diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 2c2615e2b..5fb7c42d6 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,71 +1,78 @@
 # Arch files
-/.github/ @oneapi-src/onemkl-arch-write
-/.gitignore/ @oneapi-src/onemkl-arch-write
-/cmake/ @oneapi-src/onemkl-arch-write
-/deps/ @oneapi-src/onemkl-arch-write
-/docs/ @oneapi-src/onemkl-arch-write
-/examples/include/ @oneapi-src/onemkl-arch-write
-/examples/CMakeList.txt @oneapi-src/onemkl-arch-write
-/examples/README.md @oneapi-src/onemkl-arch-write
-/include/oneapi/mkl.hpp @oneapi-src/onemkl-arch-write
-/include/oneapi/mkl/detail/ @oneapi-src/onemkl-arch-write
-/include/oneapi/mkl/exceptions.hpp @oneapi-src/onemkl-arch-write
-/scripts/ @oneapi-src/onemkl-arch-write
-/src/include/ @oneapi-src/onemkl-arch-write
-/src/CMakeLists.txt @oneapi-src/onemkl-arch-write
-/src/config.hpp.in @oneapi-src/onemkl-arch-write
-/tests/CMakeLists.txt @oneapi-src/onemkl-arch-write
-/tests/README.md @oneapi-src/onemkl-arch-write
-/tests/unit_tests/include/ @oneapi-src/onemkl-arch-write
-/tests/unit_tests/CMakeLists.txt @oneapi-src/onemkl-arch-write
-/tests/unit_tests/main_test.cpp @oneapi-src/onemkl-arch-write
-/third-party-programs/ @oneapi-src/onemkl-arch-write
-/CMakeLists.txt @oneapi-src/onemkl-arch-write
-/CODE_OF_CONDUCT.md @oneapi-src/onemkl-arch-write
-/CONTRIBUTING.md @oneapi-src/onemkl-arch-write
-/LICENSE @oneapi-src/onemkl-arch-write
-/MAINTAINERS.md @oneapi-src/onemkl-arch-write
-/README.md @oneapi-src/onemkl-arch-write
-/SECURITY.md @oneapi-src/onemkl-arch-write
-/_clang-format @oneapi-src/onemkl-arch-write
-/legal_information.md @oneapi-src/onemkl-arch-write
+/.github/ @uxlfoundation/onemath-arch-write
+/.gitignore/ @uxlfoundation/onemath-arch-write
+/cmake/ @uxlfoundation/onemath-arch-write
+/deps/ @uxlfoundation/onemath-arch-write
+/docs/ @uxlfoundation/onemath-arch-write
+/examples/include/ @uxlfoundation/onemath-arch-write
+/examples/CMakeList.txt @uxlfoundation/onemath-arch-write
+/examples/README.md @uxlfoundation/onemath-arch-write
+/include/oneapi/math.hpp @uxlfoundation/onemath-arch-write
+/include/oneapi/math/detail/ @uxlfoundation/onemath-arch-write
+/include/oneapi/mkl.hpp @uxlfoundation/onemath-arch-write
+/include/oneapi/mkl/namespace_alias.hpp @uxlfoundation/onemath-arch-write
+/scripts/ @uxlfoundation/onemath-arch-write
+/src/include/ @uxlfoundation/onemath-arch-write
+/src/CMakeLists.txt @uxlfoundation/onemath-arch-write
+/src/config.hpp.in @uxlfoundation/onemath-arch-write
+/tests/CMakeLists.txt @uxlfoundation/onemath-arch-write
+/tests/README.md @uxlfoundation/onemath-arch-write
+/tests/unit_tests/include/ @uxlfoundation/onemath-arch-write
+/tests/unit_tests/CMakeLists.txt @uxlfoundation/onemath-arch-write
+/tests/unit_tests/main_test.cpp @uxlfoundation/onemath-arch-write
+/third-party-programs/ @uxlfoundation/onemath-arch-write
+/CMakeLists.txt @uxlfoundation/onemath-arch-write
+/CODE_OF_CONDUCT.md @uxlfoundation/onemath-arch-write
+/CONTRIBUTING.md @uxlfoundation/onemath-arch-write
+/LICENSE @uxlfoundation/onemath-arch-write
+/MAINTAINERS.md @uxlfoundation/onemath-arch-write
+/README.md @uxlfoundation/onemath-arch-write
+/SECURITY.md @uxlfoundation/onemath-arch-write
+/_clang-format @uxlfoundation/onemath-arch-write
+/legal_information.md @uxlfoundation/onemath-arch-write
 
 # BLAS files
-/examples/blas/ @oneapi-src/onemkl-blas-write
-/include/oneapi/mkl/blas/ @oneapi-src/onemkl-blas-write
-/include/oneapi/mkl/blas.hxx @oneapi-src/onemkl-blas-write
-/include/oneapi/mkl/blas.hpp @oneapi-src/onemkl-blas-write
-/include/oneapi/mkl/bfloat16.hpp @oneapi-src/onemkl-blas-write
-/src/blas/ @oneapi-src/onemkl-blas-write
-/tests/unit_tests/blas/ @oneapi-src/onemkl-blas-write
+/examples/blas/ @uxlfoundation/onemath-blas-write
+/include/oneapi/math/blas/ @uxlfoundation/onemath-blas-write
+/include/oneapi/math/blas.hpp @uxlfoundation/onemath-blas-write
+/include/oneapi/math/blas.hxx @uxlfoundation/onemath-blas-write
+/include/oneapi/math/bfloat16.hpp @uxlfoundation/onemath-blas-write
+/include/oneapi/mkl/blas.hpp @uxlfoundation/onemath-blas-write
+/src/blas/ @uxlfoundation/onemath-blas-write
+/tests/unit_tests/blas/ @uxlfoundation/onemath-blas-write
 
 # DFT files
-/examples/dft/ @oneapi-src/onemkl-dft-write
-/include/oneapi/mkl/dft/ @oneapi-src/onemkl-dft-write
-/include/oneapi/mkl/dft.hpp @oneapi-src/onemkl-dft-write
-/src/dft/ @oneapi-src/onemkl-dft-write
-/tests/unit_tests/dft/ @oneapi-src/onemkl-dft-write
+/examples/dft/ @uxlfoundation/onemath-dft-write
+/include/oneapi/math/dft/ @uxlfoundation/onemath-dft-write
+/include/oneapi/math/dft.hpp @uxlfoundation/onemath-dft-write
+/include/oneapi/mkl/dft.hpp @uxlfoundation/onemath-dft-write
+/src/dft/ @uxlfoundation/onemath-dft-write
+/tests/unit_tests/dft/ @uxlfoundation/onemath-dft-write
 
 # LAPACK files
-/examples/lapack/ @oneapi-src/onemkl-lapack-write
-/include/oneapi/mkl/lapack/ @oneapi-src/onemkl-lapack-write
-/include/oneapi/mkl/lapack.hpp @oneapi-src/onemkl-lapack-write
-/src/lapack/ @oneapi-src/onemkl-lapack-write
-/tests/unit_tests/lapack/ @oneapi-src/onemkl-lapack-write
+/examples/lapack/ @uxlfoundation/onemath-lapack-write
+/include/oneapi/math/lapack/ @uxlfoundation/onemath-lapack-write
+/include/oneapi/math/lapack.hpp @uxlfoundation/onemath-lapack-write
+/include/oneapi/mkl/lapack.hpp @uxlfoundation/onemath-lapack-write
+/src/lapack/ @uxlfoundation/onemath-lapack-write
+/tests/unit_tests/lapack/ @uxlfoundation/onemath-lapack-write
 
 # RNG files
-/examples/rng/ @oneapi-src/onemkl-rng-write
-/include/oneapi/mkl/rng/ @oneapi-src/onemkl-rng-write
-/include/oneapi/mkl/rng.hpp @oneapi-src/onemkl-rng-write
-/src/rng/ @oneapi-src/onemkl-rng-write
-/tests/unit_tests/rng/ @oneapi-src/onemkl-rng-write
+/examples/rng/ @uxlfoundation/onemath-rng-write
+/include/oneapi/math/rng/ @uxlfoundation/onemath-rng-write
+/include/oneapi/math/rng.hpp @uxlfoundation/onemath-rng-write
+/include/oneapi/mkl/rng.hpp @uxlfoundation/onemath-rng-write
+/include/oneapi/mkl/rng/device.hpp @uxlfoundation/onemath-rng-write
+/src/rng/ @uxlfoundation/onemath-rng-write
+/tests/unit_tests/rng/ @uxlfoundation/onemath-rng-write
 
 # Sparse BLAS files
-/examples/sparse_blas/ @oneapi-src/onemkl-sparse-write
-/include/oneapi/mkl/sparse_blas/ @oneapi-src/onemkl-sparse-write
-/include/oneapi/mkl/sparse_blas.hpp @oneapi-src/onemkl-sparse-write
-/src/sparse_blas/ @oneapi-src/onemkl-sparse-write
-/tests/unit_tests/sparse_blas/ @oneapi-src/onemkl-sparse-write
+/examples/sparse_blas/ @uxlfoundation/onemath-sparse-write
+/include/oneapi/math/sparse_blas/ @uxlfoundation/onemath-sparse-write
+/include/oneapi/math/sparse_blas.hpp @uxlfoundation/onemath-sparse-write
+/include/oneapi/mkl/sparse_blas.hpp @uxlfoundation/onemath-sparse-write
+/src/sparse_blas/ @uxlfoundation/onemath-sparse-write
+/tests/unit_tests/sparse_blas/ @uxlfoundation/onemath-sparse-write
 
 # Shared files
-/include/oneapi/mkl/types.hpp @oneapi-src/onemkl-blas-write @oneapi-src/onemkl-lapack-write
+/include/oneapi/math/types.hpp @uxlfoundation/onemath-blas-write @uxlfoundation/onemath-lapack-write
diff --git a/.github/ISSUE_TEMPLATE/RFC.md b/.github/ISSUE_TEMPLATE/RFC.md
index 1fa6e4fb4..ad7a100a2 100755
--- a/.github/ISSUE_TEMPLATE/RFC.md
+++ b/.github/ISSUE_TEMPLATE/RFC.md
@@ -9,7 +9,7 @@ assignees: ''
 # Summary
 Include a short summary of the request. Sections below provide guidance on
 what factors are considered important. Describe how new interface will meet
-[library functionality guidelines](https://github.com/oneapi-src/oneMKL/blob/master/CONTRIBUTING.md#library-functionality-guidelines).
+[library functionality guidelines](https://github.com/uxlfoundation/oneMath/blob/master/CONTRIBUTING.md#library-functionality-guidelines).
 
 # Problem statement
 Describe the problem you are trying to solve with reasonable level of details.
@@ -18,4 +18,4 @@ Describe the problem you are trying to solve with reasonable level of details.
 * The definition of the function including interface and semantics. How this
 interface will be extendable for different HW implementations.
 * What existing libraries have implementation of this function and can be used
-under oneMKL interface.
+under oneMath.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index f6b78f782..24eb32da5 100755
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -11,11 +11,11 @@ Provide a short summary of the issue. Sections below provide guidance on what
 factors are considered important to reproduce an issue.
 
 # Version
-Report oneMKL version and githash.
+Report oneMath version and githash.
 If it is a regression, report githash for the last known good revision.
 
 # Environment
-oneMKL works with multiple HW and backend libraries and also depends on the
+oneMath works with multiple HW and backend libraries and also depends on the
 compiler and build environment. Include
 the following information to help reproduce the issue:
 * HW you use
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 7ad921d8a..c71757115 100755
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -2,7 +2,7 @@
 
 Please include a summary of the change. Please also include relevant
 motivation and context. See
-[contribution guidelines](https://github.com/oneapi-src/oneMKL/blob/master/CONTRIBUTING.md)
+[contribution guidelines](https://github.com/uxlfoundation/oneMath/blob/master/CONTRIBUTING.md)
 for more details. If the change fixes an issue not documented in the project's
 Github issue tracker, please document all steps necessary to reproduce it.
 
diff --git a/.github/scripts/domain-check.js b/.github/scripts/domain-check.js
index ed68c2e42..7a6368ec2 100644
--- a/.github/scripts/domain-check.js
+++ b/.github/scripts/domain-check.js
@@ -14,7 +14,7 @@ function matchesPattern(domain, filePaths) {
       !filePath.startsWith("third-party-programs/"),
   );
   // These directories contain domain specific code
-  const dirs = "(tests/unit_tests|examples|src|include/oneapi/mkl)";
+  const dirs = "(tests/unit_tests|examples|src|include/oneapi/math)";
   const domains = "(blas|lapack|rng|dft)";
   // matches changes to the domain of interest or non domain-specific code
   const re = new RegExp(`^(${dirs}/${domain}|(?!${dirs}/${domains}))`);
@@ -92,7 +92,7 @@ test_patterns = [
   },
   {
     domain: "lapack",
-    files: ["include/oneapi/mkl/lapack/lapack.hpp"],
+    files: ["include/oneapi/math/lapack/lapack.hpp"],
     expected: true,
   },
   {
@@ -117,7 +117,7 @@ test_patterns = [
   },
   {
     domain: "lapack",
-    files: ["include/oneapi/mkl/rng/lapack.hpp"],
+    files: ["include/oneapi/math/rng/lapack.hpp"],
     expected: false,
   },
   {
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 98c1b6614..4d713d5ed 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -41,15 +41,15 @@ jobs:
           domain: dft
           build_options: -DENABLE_PORTFFT_BACKEND=ON -DENABLE_MKLCPU_BACKEND=OFF
           test_options: -R 'DFT/CT/.*ComputeTests_in_place_COMPLEX.COMPLEX_SINGLE_in_place_buffer.sizes_8_batches_1*'
-        - config: MKL BLAS
+        - config: oneMath BLAS
           domain: blas
           build_options: -DREF_BLAS_ROOT=${PWD}/lapack/install
-        - config: MKL DFT
+        - config: oneMath DFT
           domain: dft
-        - config: MKL LAPACK
+        - config: oneMath LAPACK
           domain: lapack
           build_options: -DREF_LAPACK_ROOT=${PWD}/lapack/install
-        - config: MKL RNG
+        - config: oneMath RNG
           domain: rng
     name: unit tests ${{ matrix.config }} CPU
     steps:
diff --git a/.github/workflows/slack-pr.yaml b/.github/workflows/slack-pr.yaml
index 4c5f3df7d..5c8b466a6 100644
--- a/.github/workflows/slack-pr.yaml
+++ b/.github/workflows/slack-pr.yaml
@@ -22,7 +22,7 @@ on:
 
 env:
   SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-  channel: "onemkl"
+  channel: "onemath"
 
 permissions:
   pull-requests: read
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 76f5aedc2..e0c2de1ff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,7 +38,7 @@ option(BUILD_SHARED_LIBS "Build dynamic libraries" ON)
 option(ENABLE_MKLCPU_BACKEND "Enable the Intel oneMKL CPU backend for supported interfaces" ON)
 option(ENABLE_MKLGPU_BACKEND "Enable the Intel oneMKL GPU backend for supported interfaces" ON)
 if(ENABLE_MKLCPU_BACKEND)
-  option(ENABLE_MKLCPU_THREAD_TBB "Enable the use of Intel TBB with the oneMKL CPU backend" ON)
+  option(ENABLE_MKLCPU_THREAD_TBB "Enable the use of Intel TBB with the oneMath CPU backend" ON)
 endif()
 
 # blas
@@ -63,7 +63,7 @@ option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interf
 # sparse
 option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)
 
-set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
+set(ONEMATH_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
 set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")
 
 ## Testing
@@ -128,7 +128,7 @@ if (ENABLE_PORTFFT_BACKEND
 endif()
 
 # Define required CXX compilers before project
-if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
+if(CMAKE_CXX_COMPILER OR NOT ONEMATH_SYCL_IMPLEMENTATION STREQUAL "dpc++")
   if(WIN32)
     string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
   endif()
@@ -169,7 +169,7 @@ else()
 endif()
 
 # Define required C compilers before project
-if(CMAKE_C_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
+if(CMAKE_C_COMPILER OR NOT ONEMATH_SYCL_IMPLEMENTATION STREQUAL "dpc++")
   if(WIN32)
     string(REPLACE "\\" "/" CMAKE_C_COMPILER ${CMAKE_C_COMPILER})
   endif()
@@ -189,10 +189,10 @@ else()
   endif()
 endif()
 
-project(oneMKL VERSION 0.5.0 LANGUAGES CXX)
+project(oneMath VERSION 0.5.0 LANGUAGES CXX)
 
 # Override default CXX compile/link lines for Windows after project
-if(WIN32 AND ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
+if(WIN32 AND ONEMATH_SYCL_IMPLEMENTATION STREQUAL "dpc++")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function -w")
   foreach (flag_var
            CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
@@ -208,7 +208,7 @@ if(WIN32 AND ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
 endif()
 
 # Temporary disable sycl 2020 deprecations warnings for cuSOLVER and rocSOLVER
-if(ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++" AND (ENABLE_ROCSOLVER_BACKEND))
+if(ONEMATH_SYCL_IMPLEMENTATION STREQUAL "dpc++" AND (ENABLE_ROCSOLVER_BACKEND))
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSYCL2020_DISABLE_DEPRECATION_WARNINGS")
 endif()
 
@@ -233,7 +233,7 @@ else()
 endif()
 message(STATUS "TARGET_DOMAINS: ${TARGET_DOMAINS}")
 
-# Include Intel oneMKL
+# Include Intel(R) oneAPI Math Kernel Library (oneMKL)
 if(ENABLE_MKLGPU_BACKEND OR ENABLE_MKLCPU_BACKEND)
   set(MKL_ARCH intel64)
   set(MKL_INTERFACE ilp64)
@@ -272,23 +272,23 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
 
 # Add DPC++ options for Linux
 if(WIN32)
-  add_library(ONEMKL::SYCL::SYCL INTERFACE IMPORTED)
+  add_library(ONEMATH::SYCL::SYCL INTERFACE IMPORTED)
 else()
   # Find necessary packages
-  if(ONEMKL_SYCL_IMPLEMENTATION)
-    string( TOLOWER "${ONEMKL_SYCL_IMPLEMENTATION}" ONEMKL_SYCL_IMPLEMENTATION)
-    if (ONEMKL_SYCL_IMPLEMENTATION STREQUAL "hipsycl")
+  if(ONEMATH_SYCL_IMPLEMENTATION)
+    string( TOLOWER "${ONEMATH_SYCL_IMPLEMENTATION}" ONEMATH_SYCL_IMPLEMENTATION)
+    if (ONEMATH_SYCL_IMPLEMENTATION STREQUAL "hipsycl")
       message(STATUS "Looking for hipSYCL")
       find_package(hipSYCL CONFIG REQUIRED)
       set(USE_ADD_SYCL_TO_TARGET_INTEGRATION true)
       set (CMAKE_CXX_STANDARD 17)
-      add_library(ONEMKL::SYCL::SYCL INTERFACE IMPORTED)
-    elseif(ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
+      add_library(ONEMATH::SYCL::SYCL INTERFACE IMPORTED)
+    elseif(ONEMATH_SYCL_IMPLEMENTATION STREQUAL "dpc++")
       message(STATUS "Looking for dpc++")
       set(USE_ADD_SYCL_TO_TARGET_INTEGRATION false)
       find_package(Compiler REQUIRED)
     else()
-      message(FATAL_ERROR "SYCL implementation ${ONEMKL_SYCL_IMPLEMENTATION} is not known")
+      message(FATAL_ERROR "SYCL implementation ${ONEMATH_SYCL_IMPLEMENTATION} is not known")
     endif()
   else()
     message(STATUS "Looking for dpc++")
@@ -298,8 +298,8 @@ else()
 endif()
 
 if(DEFINED REF_BLAS_ROOT)
-  find_file(ONEMKL_REF_BLAS_LIBNAME NAMES blas.dll libblas.so HINTS ${REF_BLAS_ROOT} PATH_SUFFIXES lib lib64)
-  find_file(ONEMKL_REF_CBLAS_LIBNAME NAMES cblas.dll libcblas.so HINTS ${REF_BLAS_ROOT} PATH_SUFFIXES lib lib64)
+  find_file(ONEMATH_REF_BLAS_LIBNAME NAMES blas.dll libblas.so HINTS ${REF_BLAS_ROOT} PATH_SUFFIXES lib lib64)
+  find_file(ONEMATH_REF_CBLAS_LIBNAME NAMES cblas.dll libcblas.so HINTS ${REF_BLAS_ROOT} PATH_SUFFIXES lib lib64)
 endif()
 
 # Add source directory and output to bin/
@@ -330,30 +330,30 @@ install(DIRECTORY include/
 
 include(CMakePackageConfigHelpers)
 write_basic_package_version_file(
-  "${CMAKE_CURRENT_BINARY_DIR}/oneMKLConfigVersion.cmake"
+  "${CMAKE_CURRENT_BINARY_DIR}/oneMathConfigVersion.cmake"
   VERSION ${PROJECT_VERSION}
   COMPATIBILITY AnyNewerVersion
 )
 
-export(EXPORT oneMKLTargets
-  FILE "${CMAKE_CURRENT_BINARY_DIR}/oneMKLTargets.cmake"
-  NAMESPACE ONEMKL::
+export(EXPORT oneMathTargets
+  FILE "${CMAKE_CURRENT_BINARY_DIR}/oneMathTargets.cmake"
+  NAMESPACE ONEMATH::
 )
-configure_file("${PROJECT_SOURCE_DIR}/cmake/oneMKLConfig.cmake"
-  "${CMAKE_CURRENT_BINARY_DIR}/oneMKLConfig.cmake"
+configure_file("${PROJECT_SOURCE_DIR}/cmake/oneMathConfig.cmake"
+  "${CMAKE_CURRENT_BINARY_DIR}/oneMathConfig.cmake"
   COPYONLY
 )
 
 set(config_package_location "lib/cmake/${PROJECT_NAME}")
-install(EXPORT oneMKLTargets
-  FILE oneMKLTargets.cmake
-  NAMESPACE MKL::
+install(EXPORT oneMathTargets
+  FILE oneMathTargets.cmake
+  NAMESPACE ONEMATH::
   DESTINATION ${config_package_location}
 )
 install(
   FILES
-  "${PROJECT_SOURCE_DIR}/cmake/oneMKLConfig.cmake"
-  "${CMAKE_CURRENT_BINARY_DIR}/oneMKLConfigVersion.cmake"
+  "${PROJECT_SOURCE_DIR}/cmake/oneMathConfig.cmake"
+  "${CMAKE_CURRENT_BINARY_DIR}/oneMathConfigVersion.cmake"
   DESTINATION ${config_package_location}
   COMPONENT Devel
 )
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7781b9c6e..3d1a2300d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,5 @@
 # Contributing Guidelines
-If you have improvements, new libraries integrated under oneAPI Math Kernel Library (oneMKL) Interfaces, or new interfaces to contribute to the oneMKL Specification, please send us your pull requests! For getting started, see GitHub [howto](https://help.github.com/en/articles/about-pull-requests).
+If you have improvements, new libraries integrated under oneAPI Math Library (oneMath), or new interfaces to contribute to the oneMath Specification, please send us your pull requests! For getting started, see GitHub [howto](https://help.github.com/en/articles/about-pull-requests).
 
 For how to enable a new third-party library, see the [guidelines](docs/create_new_backend.rst).
 
@@ -15,11 +15,11 @@ Before sending your pull requests, ensure that you follow this checklist:
 
 * Ensure that [unit tests](CONTRIBUTING.md#unit-tests) pass. Include logs from tests as attachments to the pull request.
 
-* Ensure that corresponding [maintainer GitHub team](#onemkl-interfaces-maintainers) is assigned to the PR review.
+* Ensure that corresponding [maintainer GitHub team](#onemath-maintainers) is assigned to the PR review.
 
 ## Library Functionality Guidelines
 
-oneMKL focuses on the following criteria:
+oneMath focuses on the following criteria:
 
 1. *Performance*: Functionality that has highly optimized and extensively parallelized routines for applications that require maximum performance.
 
@@ -31,13 +31,13 @@ oneMKL focuses on the following criteria:
 
 3. *Complexity*: Functionality that is not trivial to implement directly or by combining existing primitives.
 
-For the new API to become a part of the open source project, it should be accepted as part of [oneMKL spec](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/).
+For the new API to become a part of the open source project, it should be accepted as part of [oneMath spec](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/).
 
 
 ### Request for Comments Process
 
 For changes impacting the public API or any significant changes in the library, such as adding new backend or changes to the architecture,
-please follow the [RFC process](https://github.com/oneapi-src/oneMKL/tree/rfcs).
+please follow the [RFC process](https://github.com/uxlfoundation/oneMath/tree/rfcs).
 
 Please also provide the following details as part of the RFC:
 
@@ -45,33 +45,33 @@ Please also provide the following details as part of the RFC:
 
 * The definition of the function including the interface and semantics, and how this interface will be extendable for different HW implementations.
 
-* What existing libraries have implementations of this function and can be used under the oneMKL interface.
+* What existing libraries have implementations of this function and can be used under oneMath.
 
-* Ensure that corresponding [maintainer GitHub team](#onemkl-interfaces-maintainers) is assigned to the RFC review.
+* Ensure that corresponding [maintainer GitHub team](#onemath-maintainers) is assigned to the RFC review.
 
 ## Bug Reporting
 
-If you find a bug or problem, please open a request under [Issues](https://github.com/oneapi-src/oneMKL/issues).
+If you find a bug or problem, please open a request under [Issues](https://github.com/uxlfoundation/oneMath/issues).
 
 
 ## Security Issues
 
 Report security issues to onemkl.maintainers@intel.com.
 
-## oneMKL Interfaces Maintainers
+## oneMath Maintainers
 
 For GitHub questions, issues, RFCs, or PRs you can contact maintainers via one of the following GitHub teams based on the topic:
 
 | GitHub team name | Description |
 :-----------|:------------|
-| @oneapi-src/onemkl-maintain  | All oneMKL Interfaces maintainers |
-| @oneapi-src/onemkl-arch-write | oneMKL Interfaces Architecture maintainers |
-| @oneapi-src/onemkl-blas-write | oneMKL Interfaces BLAS maintainers |
-| @oneapi-src/onemkl-dft-write | oneMKL Interfaces DFT maintainers |
-| @oneapi-src/onemkl-lapack-write | oneMKL Interfaces LAPACK maintainers |
-| @oneapi-src/onemkl-rng-write | oneMKL Interfaces RNG maintainers |
-| @oneapi-src/onemkl-sparse-write | oneMKL Interfaces Sparse Algebra maintainers |
-| @oneapi-src/onemkl-vm-write | oneMKL Interfaces Vector Math maintainers |
+| @uxlfoundation/onemath-maintain  | All oneMath maintainers |
+| @uxlfoundation/onemath-arch-write | oneMath Architecture maintainers |
+| @uxlfoundation/onemath-blas-write | oneMath BLAS maintainers |
+| @uxlfoundation/onemath-dft-write | oneMath DFT maintainers |
+| @uxlfoundation/onemath-lapack-write | oneMath LAPACK maintainers |
+| @uxlfoundation/onemath-rng-write | oneMath RNG maintainers |
+| @uxlfoundation/onemath-sparse-write | oneMath Sparse Algebra maintainers |
+| @uxlfoundation/onemath-vm-write | oneMath Vector Math maintainers |
 
 Please read [MAINTAINERS page](MAINTAINERS.md) for more information about maintainer roles, responsibilities, and how to become one of them.
 
@@ -81,12 +81,12 @@ The general principle is to follow the style of existing/surrounding code. If yo
 ```sh
 clang-format -style=file -i foo.cpp
 ```
-This formats code using the `_clang_format` file found in the oneMKL top-level directory. The version of `clang-format` is specified in [`.pre-commit-config.yaml`](https://github.com/oneapi-src/oneMKL/blob/develop/.pre-commit-config.yaml). Alternatively, you can install and run `pre-commit`, which will install the specified `clang-format` version automatically:
+This formats code using the `_clang_format` file found in the oneMath top-level directory. The version of `clang-format` is specified in [`.pre-commit-config.yaml`](https://github.com/uxlfoundation/oneMath/blob/develop/.pre-commit-config.yaml). Alternatively, you can install and run `pre-commit`, which will install the specified `clang-format` version automatically:
 ```sh
 python3 -m venv <venv-name>
 source <venv-name>/bin/activate
 pip install pre-commit
-cd <path-to-onemkl>
+cd <path-to-onemath>
 pre-commit run --all-files
 deactivate
 ```
@@ -177,43 +177,43 @@ files in the following order:
 ### NS: Namespaces
 * **NS1:** Use snake_case: all lowercase, with underscores "_" between words for all namespaces.
 
-* **NS2:** The name of a top-level namespace must be the name of the project (oneMKL).
+* **NS2:** The name of a top-level namespace must be the name of the project (oneMath).
 
 * **NS3:** Do not indent content inside a namespace scope.
 
 ```c
 // Wrong! Do not indent
 namespace oneapi {
-namespace mkl {
+namespace math {
 
    class table { };
 
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 // Right
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 class table { };
 
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 ```
 
 *  **NS4:** Put each namespace on its own line when declaring nested namespaces.
 
 ```c
-#include "oneapi/mkl/blas/path_to_some_header.hpp"
+#include "oneapi/math/blas/path_to_some_header.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 
 /* ... */
 
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 ```
 
@@ -351,6 +351,6 @@ for (int i = 0; i < loop_size; i++) ...;
 
 ## Unit Tests
 
-oneMKL uses GoogleTest for functional testing. For more information about how to build and run Unit Tests please see [Building and Running Tests](https://oneapi-src.github.io/oneMKL/building_and_running_tests.html).
+oneMath uses GoogleTest for functional testing. For more information about how to build and run Unit Tests please see [Building and Running Tests](https://uxlfoundation.github.io/oneMath/building_and_running_tests.html).
 
 Be sure to extend the existing tests when fixing an issue, adding a new interface or new implementation under existing interfaces.
diff --git a/LICENSE b/LICENSE
index 580630b67..203f8b0dc 100644
--- a/LICENSE
+++ b/LICENSE
@@ -203,7 +203,7 @@
    limitations under the License.
    
 This distribution includes third party software ("third party programs"). This 
-third party software, even if included with the distribution of the oneMKL open 
+third party software, even if included with the distribution of the oneMath open 
 source project, may be governed by separate license terms, including without 
 limitation, third party license terms, and open source software license terms. 
 These separate license terms govern your use of the third party programs as set 
diff --git a/MAINTAINERS.md b/MAINTAINERS.md
index 98b996522..1889851cd 100644
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -1,10 +1,10 @@
 # Introduction
 
-This document defines roles in oneMKL Interfaces project.
+This document defines roles in oneMath project.
 
 # Roles and responsibilities
 
-oneMKL Interfaces project defines three main roles:
+oneMath project defines three main roles:
  * [Contributor](#contributor)
  * [Domain maintainer](#domain-maintainer)
  * [Architecture maintainer](#architecture-maintainer)
@@ -14,7 +14,7 @@ requirements and the nomination process.
 
 ## Contributor
 
-A Contributor invests time and resources to improve oneMKL Interfaces project.
+A Contributor invests time and resources to improve oneMath project.
 Anyone can become a Contributor by bringing value in one of the following ways:
   * Answer questions from community members.
   * Submit feedback to design proposals.
@@ -72,12 +72,12 @@ The process of becoming a Domain maintainer is:
 
 | GitHub team name | Domain maintainers |
 :-----------|:------------|
-| @oneapi-src/onemkl-blas-write | oneMKL Interfaces BLAS maintainers |
-| @oneapi-src/onemkl-dft-write | oneMKL Interfaces DFT maintainers |
-| @oneapi-src/onemkl-lapack-write) | oneMKL Interfaces LAPACK maintainers |
-| @oneapi-src/onemkl-rng-write | oneMKL Interfaces RNG maintainers |
-| @oneapi-src/onemkl-sparse-write | oneMKL Interfaces Sparse Algebra maintainers |
-| @oneapi-src/onemkl-vm-write | oneMKL Interfaces Vector Math maintainers |
+| @uxlfoundation/onemath-blas-write | oneMath BLAS maintainers |
+| @uxlfoundation/onemath-dft-write | oneMath DFT maintainers |
+| @uxlfoundation/onemath-lapack-write | oneMath LAPACK maintainers |
+| @uxlfoundation/onemath-rng-write | oneMath RNG maintainers |
+| @uxlfoundation/onemath-sparse-write | oneMath Sparse Algebra maintainers |
+| @uxlfoundation/onemath-vm-write | oneMath Vector Math maintainers |
 
 ## Architecture Maintainer
 Architecture maintainers are the most established contributors who are responsible for the
@@ -107,7 +107,7 @@ Privileges:
   * Can recommend Contributor or Domain maintainer to become Architecture maintainers.
 
 Process of becoming a maintainer:
-1. A Contributor or Domain maintainer requests to join oneMKL Interfaces Architecture maintainers GitHub team
-(@oneapi-src/onemkl-arch-write).
+1. A Contributor or Domain maintainer requests to join oneMath Architecture maintainers GitHub team
+(@uxlfoundation/onemath-arch-write).
 2. At least one of Architecture maintainers approves the request.
 
diff --git a/README.md b/README.md
index 04c14baf4..775994faf 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,25 @@
 <img src="https://github.com/uxlfoundation/artwork/blob/main/foundation/uxl-foundation-logo-horizontal-color.png" alt="UXL Foundation Logo" width="250"/>
 
-# oneAPI Math Kernel Library (oneMKL) Interfaces
+# oneAPI Math Library (oneMath)
 
-oneMKL Interfaces is an open-source implementation of the oneMKL Data Parallel C++ (DPC++) interface according to the [oneMKL specification](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/). It works with multiple devices (backends) using device-specific libraries underneath.
+oneMath is an open-source implementation of the [oneMath specification](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/). It can work with multiple devices using multiple libraries (backends) underneath. The oneMath project was previously referred to as oneMKL Interface.
 
-oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
+oneMath is part of the [UXL Foundation](http://www.uxlfoundation.org).
 <br/><br/>
 
 <table>
     <thead>
         <tr align="center" >
             <th>User Application</th>
-            <th>oneMKL Layer</th>
+            <th>oneMath Layer</th>
             <th>Third-Party Library</th>
             <th>Hardware Backend</th>
         </tr>
     </thead>
     <tbody>
         <tr>
-            <td rowspan=13 align="center">oneMKL interface</td>
-            <td rowspan=13 align="center">oneMKL selector</td>
+            <td rowspan=13 align="center">oneMath</td>
+            <td rowspan=13 align="center">oneMath selector</td>
             <td align="center"><a href="https://software.intel.com/en-us/oneapi/onemkl">Intel(R) oneAPI Math Kernel Library (oneMKL)</a></td>
             <td align="center">x86 CPU, Intel GPU</td>
         </tr>
@@ -91,14 +91,14 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
 
 #### Host API
 
-There are two oneMKL selector layer implementations:
+There are two oneMath selector layer implementations:
 
-- **Run-time dispatching**: The application is linked with the oneMKL library and the required backend is loaded at run-time based on device vendor (all libraries should be dynamic).
+- **Run-time dispatching**: The application is linked with the oneMath library and the required backend is loaded at run-time based on device vendor (all libraries should be dynamic).
 
   Example of app.cpp with run-time dispatching:
   
   ```cpp
-  #include "oneapi/mkl.hpp"
+  #include "oneapi/math.hpp"
   
   ...
   cpu_dev = sycl::device(sycl::cpu_selector());
@@ -107,24 +107,24 @@ There are two oneMKL selector layer implementations:
   sycl::queue cpu_queue(cpu_dev);
   sycl::queue gpu_queue(gpu_dev);
   
-  oneapi::mkl::blas::column_major::gemm(cpu_queue, transA, transB, m, ...);
-  oneapi::mkl::blas::column_major::gemm(gpu_queue, transA, transB, m, ...);
+  oneapi::math::blas::column_major::gemm(cpu_queue, transA, transB, m, ...);
+  oneapi::math::blas::column_major::gemm(gpu_queue, transA, transB, m, ...);
   ```
   How to build an application with run-time dispatching:
   
   if OS is Linux, use icpx compiler. If OS is Windows, use icx compiler.
   Linux example:
   ```cmd
-  $> icpx -fsycl –I$ONEMKL/include app.cpp
-  $> icpx -fsycl app.o –L$ONEMKL/lib –lonemkl
+  $> icpx -fsycl –I$ONEMATH/include app.cpp
+  $> icpx -fsycl app.o –L$ONEMATH/lib –lonemath
   ```
 
-- **Compile-time dispatching**: The application uses a templated backend selector API where the template parameters specify the required backends and third-party libraries and the application is linked with the required oneMKL backend wrapper libraries (libraries can be static or dynamic).
+- **Compile-time dispatching**: The application uses a templated backend selector API where the template parameters specify the required backends and third-party libraries and the application is linked with the required oneMath backend wrapper libraries (libraries can be static or dynamic).
 
   Example of app.cpp with compile-time dispatching:
   
   ```cpp
-  #include "oneapi/mkl.hpp"
+  #include "oneapi/math.hpp"
   
   ...
   cpu_dev = sycl::device(sycl::cpu_selector());
@@ -133,25 +133,25 @@ There are two oneMKL selector layer implementations:
   sycl::queue cpu_queue(cpu_dev);
   sycl::queue gpu_queue(gpu_dev);
   
-  oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu> cpu_selector(cpu_queue);
+  oneapi::math::backend_selector<oneapi::math::backend::mklcpu> cpu_selector(cpu_queue);
   
-  oneapi::mkl::blas::column_major::gemm(cpu_selector, transA, transB, m, ...);
-  oneapi::mkl::blas::column_major::gemm(oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas> {gpu_queue}, transA, transB, m, ...);
+  oneapi::math::blas::column_major::gemm(cpu_selector, transA, transB, m, ...);
+  oneapi::math::blas::column_major::gemm(oneapi::math::backend_selector<oneapi::math::backend::cublas> {gpu_queue}, transA, transB, m, ...);
   ```
   How to build an application with compile-time dispatching:
   
   ```cmd
-  $> clang++ -fsycl –I$ONEMKL/include app.cpp
-  $> clang++ -fsycl app.o –L$ONEMKL/lib –lonemkl_blas_mklcpu –lonemkl_blas_cublas
+  $> clang++ -fsycl –I$ONEMATH/include app.cpp
+  $> clang++ -fsycl app.o –L$ONEMATH/lib –lonemath_blas_mklcpu –lonemath_blas_cublas
   ```
   
-*Refer to [Selecting a Compiler](https://oneapi-src.github.io/oneMKL/selecting_a_compiler.html) for the choice between `icpx/icx` and `clang++` compilers.*
+*Refer to [Selecting a Compiler](https://uxlfoundation.github.io/oneMath/selecting_a_compiler.html) for the choice between `icpx/icx` and `clang++` compilers.*
 
 #### Device API
 
-Header-based and backend-independent Device API can be called within ```sycl kernel``` or work from Host code ([device-rng-usage-model-example](https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/rng/device_api/device-rng-usage-model.html#id2)). Currently, the following domains support the Device API:
+Header-based and backend-independent Device API can be called within ```sycl kernel``` or work from Host code ([device-rng-usage-model-example](https://spec.oneapi.io/versions/latest/elements/oneMath/source/domains/rng/device_api/device-rng-usage-model.html#id2)). Currently, the following domains support the Device API:
 
-- **RNG**. To use RNG Device API functionality it's required to include ```oneapi/mkl/rng/device.hpp``` header file.
+- **RNG**. To use RNG Device API functionality it's required to include ```oneapi/math/rng/device.hpp``` header file.
 
 ### Supported Configurations:
 
@@ -544,22 +544,22 @@ Product | Supported Version | License
 ---
 
 ## Documentation
-- [Contents](https://oneapi-src.github.io/oneMKL/)
-- [About](https://oneapi-src.github.io/oneMKL/introduction.html)
+- [Contents](https://uxlfoundation.github.io/oneMath/)
+- [About](https://uxlfoundation.github.io/oneMath/introduction.html)
 - Get Started
-  - [Selecting a Compiler](https://oneapi-src.github.io/oneMKL/selecting_a_compiler.html)
-  - [Building the Project with DPC++](https://oneapi-src.github.io/oneMKL/building_the_project_with_dpcpp.html)
-  - [Building the Project with AdaptiveCpp](https://oneapi-src.github.io/oneMKL/building_the_project_with_adaptivecpp.html)
+  - [Selecting a Compiler](https://uxlfoundation.github.io/oneMath/selecting_a_compiler.html)
+  - [Building the Project with DPC++](https://uxlfoundation.github.io/oneMath/building_the_project_with_dpcpp.html)
+  - [Building the Project with AdaptiveCpp](https://uxlfoundation.github.io/oneMath/building_the_project_with_adaptivecpp.html)
 - Developer Reference
-  - [oneMKL Defined Datatypes](https://oneapi-src.github.io/oneMKL/onemkl-datatypes.html)
-  - [Dense Linear Algebra](https://oneapi-src.github.io/oneMKL/domains/dense_linear_algebra.html)
-- [Integrating a Third-Party Library](https://oneapi-src.github.io/oneMKL/create_new_backend.html)
+  - [oneMath Defined Datatypes](https://uxlfoundation.github.io/oneMath/onemath-datatypes.html)
+  - [Dense Linear Algebra](https://uxlfoundation.github.io/oneMath/domains/dense_linear_algebra.html)
+- [Integrating a Third-Party Library](https://uxlfoundation.github.io/oneMath/create_new_backend.html)
 
 ---
 
 ## Governance
 
-The oneMKL Interfaces project is governed by the UXL Foundation and you can get involved in this project in multiple ways. It is possible to join the [Math Special Interest Group (SIG)](https://github.com/uxlfoundation/foundation/tree/main/math) meetings where the group discusses and demonstrates work using this project. Members can also join the Open Source and Specification Working Group meetings.
+The oneMath project is governed by the UXL Foundation and you can get involved in this project in multiple ways. It is possible to join the [Math Special Interest Group (SIG)](https://github.com/uxlfoundation/foundation/tree/main/math) meetings where the group discusses and demonstrates work using this project. Members can also join the Open Source and Specification Working Group meetings.
 
 You can also join the mailing lists for the [UXL Foundation](https://lists.uxlfoundation.org/g/main/subgroups) to be informed of when meetings are happening and receive the latest information and discussions.
 
@@ -567,7 +567,7 @@ You can also join the mailing lists for the [UXL Foundation](https://lists.uxlfo
 
 ## Contributing
 
-You can contribute to this project and also contribute to [the specification for this project](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/). Please read the [CONTRIBUTING](CONTRIBUTING.md) page for more information. You can also contact oneMKL developers and maintainers via [UXL Foundation Slack](https://slack-invite.uxlfoundation.org/) using [#onemkl](https://uxlfoundation.slack.com/archives/onemkl) channel.
+You can contribute to this project and also contribute to [the specification for this project](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/). Please read the [CONTRIBUTING](CONTRIBUTING.md) page for more information. You can also contact oneMath developers and maintainers via [UXL Foundation Slack](https://slack-invite.uxlfoundation.org/) using [#onemath](https://uxlfoundation.slack.com/archives/onemath) channel.
 
 ---
 
@@ -579,31 +579,31 @@ Distributed under the Apache license 2.0. See [LICENSE](LICENSE) for more inform
 
 ## FAQs
 
-### oneMKL
+### oneMath
 
-**Q: What is the difference between the following oneMKL items?**
-   - The [oneAPI Specification for oneMKL](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/)
-   - The [oneAPI Math Kernel Library (oneMKL) Interfaces](https://github.com/oneapi-src/oneMKL) Project
+**Q: What is the difference between the following items?**
+   - The [oneAPI Specification for oneMath](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/)
+   - The [oneAPI Math Library (oneMath)](https://github.com/uxlfoundation/oneMath) project
    - The [Intel(R) oneAPI Math Kernel Library (oneMKL)](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl.html) Product
 
 **A:**
-- The [oneAPI Specification for oneMKL](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/) defines the DPC++ interfaces for performance math library functions. The oneMKL specification can evolve faster and more frequently than implementations of the specification.
+- The [oneAPI Specification for oneMath](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/) defines the SYCL interfaces for performance math library functions. The oneMath specification can evolve faster and more frequently than implementations of the specification.
 
-- The [oneAPI Math Kernel Library (oneMKL) Interfaces](https://github.com/oneapi-src/oneMKL) Project is an open source implementation of the specification. The project goal is to demonstrate how the DPC++ interfaces documented in the oneMKL specification can be implemented for any math library and work for any target hardware. While the implementation provided here may not yet be the full implementation of the specification, the goal is to build it out over time. We encourage the community to contribute to this project and help to extend support to multiple hardware targets and other math libraries.
+- The [oneAPI Math Library (oneMath)](https://github.com/uxlfoundation/oneMath) project is an open source implementation of the specification. The project goal is to demonstrate how the SYCL interfaces documented in the oneMath specification can be implemented for any math library and work for any target hardware. While the implementation provided here may not yet be the full implementation of the specification, the goal is to build it out over time. We encourage the community to contribute to this project and help to extend support to multiple hardware targets and other math libraries.
 
-- The [Intel(R) oneAPI Math Kernel Library (oneMKL)](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl.html) product is the Intel product implementation of the specification (with DPC++ interfaces) as well as similar functionality with C and Fortran interfaces, and is provided as part of Intel® oneAPI Base Toolkit. It is highly optimized for Intel CPU and Intel GPU hardware.
+- The [Intel(R) oneAPI Math Kernel Library (oneMKL)](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl.html) project is an Intel product provided as part of the Intel(R) oneAPI Base Toolkit. It is used for the Intel backends of oneMath. Its C++ API is very similar to the oneMath specification. It is highly optimized for Intel CPU and Intel GPU hardware.
 
-**Q: I'm trying to use oneMKL Interfaces in my project using `FetchContent`**, but I keep running into `ONEMKL::SYCL::SYCL target was not found` problem when I try to build the project. What should I do?
+**Q: I'm trying to use oneMath in my project using `FetchContent`**, but I keep running into `ONEMATH::SYCL::SYCL target was not found` problem when I try to build the project. What should I do?
 
 **A:**
 Make sure you set the compiler when you configure your project.
 E.g. `cmake -Bbuild . -DCMAKE_CXX_COMPILER=icpx`.
 
-**Q: I'm trying to use oneMKL Interfaces in my project using `find_package(oneMKL)`.** I set oneMKL/oneTBB and Compiler environment first, then I built and installed oneMKL Interfaces, and finally I tried to build my project using installed oneMKL Interfaces (e.g. like this `cmake -Bbuild -GNinja -DCMAKE_CXX_COMPILER=icpx -DoneMKL_ROOT=<path_to_installed_oneMKL_interfaces> .`) and I noticed that cmake includes installed oneMKL Interfaces headers as a system include which ends up as a lower priority than the installed oneMKL package includes which I set before for building oneMKL Interfaces. As a result, I get conflicts between oneMKL and installed oneMKL Interfaces headers. What should I do?
+**Q: I'm trying to use oneMath in my project using `find_package(oneMath)`.** I set oneMath/oneTBB and Compiler environment first, then I built and installed oneMath, and finally I tried to build my project using installed oneMath (e.g. like this `cmake -Bbuild -GNinja -DCMAKE_CXX_COMPILER=icpx -DoneMath_ROOT=<path_to_installed_oneMath> .`) and I noticed that cmake includes installed oneMath headers as a system include which ends up as a lower priority than the installed Intel(R) oneAPI Math Kernel Library package includes which I set before for building oneMath. As a result, I get conflicts between Intel(R) oneAPI Math Kernel Library and installed oneMath headers. What should I do?
 
 **A:**
-Having installed oneMKL Interfaces headers as `-I` instead on system includes (as `-isystem`) helps to resolve this problem. We use `INTERFACE_INCLUDE_DIRECTORIES` to add paths to installed oneMKL Interfaces headers (check `oneMKLTargets.cmake` in `lib/cmake` to find it). It's a known limitation that `INTERFACE_INCLUDE_DIRECTORIES` puts headers paths as system headers. To avoid that:
-- Option 1: Use CMake >=3.25. In this case oneMKL Interfaces will be built with `EXPORT_NO_SYSTEM` property set to `true` and you won't see the issue.
+Having installed oneMath headers as `-I` instead on system includes (as `-isystem`) helps to resolve this problem. We use `INTERFACE_INCLUDE_DIRECTORIES` to add paths to installed oneMath headers (check `oneMathTargets.cmake` in `lib/cmake` to find it). It's a known limitation that `INTERFACE_INCLUDE_DIRECTORIES` puts headers paths as system headers. To avoid that:
+- Option 1: Use CMake >=3.25. In this case oneMath will be built with `EXPORT_NO_SYSTEM` property set to `true` and you won't see the issue.
 - Option 2: If you use CMake < 3.25, set `PROPERTIES NO_SYSTEM_FROM_IMPORTED true` for your target. E.g: `set_target_properties(test PROPERTIES NO_SYSTEM_FROM_IMPORTED true)`.
 
 ---
diff --git a/SECURITY.md b/SECURITY.md
index 480361d12..7c8af680f 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,7 +2,7 @@
 As an open-source project, we understand the importance of and responsibility
 for security. This Security policy outlines our guidelines and procedures for
 ensuring the highest level of Security and trust for our users who consume
-oneMKL Interfaces.
+oneMath.
 
 ## Supported Versions
 We provide support for the [latest version][1] only.
@@ -27,22 +27,22 @@ Along with the report, please include the following info:
   please provide details.
 
 ### When Should I Report a Vulnerability?
-* You think you discovered a potential security vulnerability in oneMKL Interfaces.
-* You are unsure how the potential vulnerability affects oneMKL Interfaces.
+* You think you discovered a potential security vulnerability in oneMath.
+* You are unsure how the potential vulnerability affects oneMath.
 * You think you discovered a vulnerability in another project or 3rd party
-component on which oneMKL Interfaces depends. If the issue is not fixed in the 3rd party
+component on which oneMath depends. If the issue is not fixed in the 3rd party
 component, try to report directly there first.
 
 ### When Should I NOT Report a Vulnerability?
 * You got an automated scan hit and are unable to provide details.
-* You need help using oneMKL Interfaces for security.
+* You need help using oneMath for security.
 * You need help applying security-related updates.
 * Your issue is not security-related.
 
 ## Security Reports Review Process
 Our goal is to respond quickly to your inquiry, and to coordinate a fix and
 disclosure with you. All confirmed security vulnerabilities will be addressed
-according to severity level and impact on oneMKL Interfaces. Normally, security issues
+according to severity level and impact on oneMath. Normally, security issues
 are fixed in the next planned release.
 
 ## Disclosure Policy
@@ -60,6 +60,6 @@ If you have any suggestions on how this Policy could be improved, please submit
 an issue or a pull request to this repository. Please **do not** report
 potential vulnerabilities or security flaws via a pull request.
 
-[1]: https://github.com/oneapi-src/oneMKL/releases/latest
-[2]: https://github.com/oneapi-src/oneMKL/security/advisories/new
-[3]: https://github.com/oneapi-src/oneMKL/security/advisories
\ No newline at end of file
+[1]: https://github.com/uxlfoundation/oneMath/releases/latest
+[2]: https://github.com/uxlfoundation/oneMath/security/advisories/new
+[3]: https://github.com/uxlfoundation/oneMath/security/advisories
\ No newline at end of file
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index df7d2fc4c..b564414df 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -21,7 +21,7 @@ install(FILES FindCompiler.cmake
         DESTINATION "lib/cmake/${PROJECT_NAME}"
 )
 if(ENABLE_MKLGPU_BACKEND OR ENABLE_MKLCPU_BACKEND)
-  install(FILES mkl/MKLConfig.cmake
+  install(FILES math/MKLConfig.cmake
         DESTINATION "lib/cmake/${PROJECT_NAME}"
   )
 endif()
diff --git a/cmake/FindCompiler.cmake b/cmake/FindCompiler.cmake
index 8aefc2623..3467f23d9 100644
--- a/cmake/FindCompiler.cmake
+++ b/cmake/FindCompiler.cmake
@@ -30,7 +30,7 @@ if(is_dpcpp)
     message(FATAL_ERROR "SYCL library is not found in ${SYCL_BINARY_DIR}/../lib, PATH, and LIBRARY_PATH")
   endif()
 
-  add_library(ONEMKL::SYCL::SYCL INTERFACE IMPORTED)
+  add_library(ONEMATH::SYCL::SYCL INTERFACE IMPORTED)
   if(UNIX)
     set(UNIX_INTERFACE_COMPILE_OPTIONS -fsycl)
     set(UNIX_INTERFACE_LINK_OPTIONS -fsycl)
@@ -53,18 +53,18 @@ if(is_dpcpp)
     endif()
     if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
 	    OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
-      set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
+      set_target_properties(ONEMATH::SYCL::SYCL PROPERTIES
         INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
         INTERFACE_LINK_OPTIONS "${UNIX_INTERFACE_LINK_OPTIONS}"
         INTERFACE_LINK_LIBRARIES ${SYCL_LIBRARY})
     else()
-      set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
+      set_target_properties(ONEMATH::SYCL::SYCL PROPERTIES
         INTERFACE_COMPILE_OPTIONS "-fsycl"
         INTERFACE_LINK_OPTIONS "-fsycl"
         INTERFACE_LINK_LIBRARIES ${SYCL_LIBRARY})
     endif()
   else()
-    set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
+    set_target_properties(ONEMATH::SYCL::SYCL PROPERTIES
       INTERFACE_COMPILE_OPTIONS "-fsycl"
       INTERFACE_LINK_LIBRARIES ${SYCL_LIBRARY})
   endif()
diff --git a/cmake/FindNETLIB.cmake b/cmake/FindNETLIB.cmake
index a066f4043..788e92ec4 100644
--- a/cmake/FindNETLIB.cmake
+++ b/cmake/FindNETLIB.cmake
@@ -36,6 +36,6 @@ list(APPEND NETLIB_LINK ${NETLIB_BLAS_LIBRARY})
 
 find_package_handle_standard_args(NETLIB REQUIRED_VARS NETLIB_INCLUDE NETLIB_LINK)
 
-add_library(ONEMKL::NETLIB::NETLIB UNKNOWN IMPORTED)
-set_target_properties(ONEMKL::NETLIB::NETLIB PROPERTIES IMPORTED_LOCATION ${NETLIB_CBLAS_LIBRARY})
+add_library(ONEMATH::NETLIB::NETLIB UNKNOWN IMPORTED)
+set_target_properties(ONEMATH::NETLIB::NETLIB PROPERTIES IMPORTED_LOCATION ${NETLIB_CBLAS_LIBRARY})
 
diff --git a/cmake/FindcuBLAS.cmake b/cmake/FindcuBLAS.cmake
index 496226580..028e1bd41 100644
--- a/cmake/FindcuBLAS.cmake
+++ b/cmake/FindcuBLAS.cmake
@@ -36,8 +36,8 @@ find_package(Threads REQUIRED)
 include(FindPackageHandleStandardArgs)
 
 
-if(NOT TARGET ONEMKL::cuBLAS::cuBLAS)
-  add_library(ONEMKL::cuBLAS::cuBLAS SHARED IMPORTED)
+if(NOT TARGET ONEMATH::cuBLAS::cuBLAS)
+  add_library(ONEMATH::cuBLAS::cuBLAS SHARED IMPORTED)
   if(USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     find_package_handle_standard_args(cuBLAS
         REQUIRED_VARS
@@ -47,7 +47,7 @@ if(NOT TARGET ONEMKL::cuBLAS::cuBLAS)
           CUDA_CUDART_LIBRARY
 	  CUDA_CUDA_LIBRARY
     )
-    set_target_properties(ONEMKL::cuBLAS::cuBLAS PROPERTIES
+    set_target_properties(ONEMATH::cuBLAS::cuBLAS PROPERTIES
         IMPORTED_LOCATION ${CUDA_cublas_LIBRARY}
         INTERFACE_INCLUDE_DIRECTORIES "${CUDA_TOOLKIT_INCLUDE}"
         INTERFACE_LINK_LIBRARIES "Threads::Threads;${CUDA_LIBRARIES};${CUDA_CUDART_LIBRARY};${CUDA_CUDA_LIBRARY}"
@@ -61,7 +61,7 @@ if(NOT TARGET ONEMKL::cuBLAS::cuBLAS)
           CUDA_CUDA_LIBRARY
           OPENCL_INCLUDE_DIR
     )
-    set_target_properties(ONEMKL::cuBLAS::cuBLAS PROPERTIES
+    set_target_properties(ONEMATH::cuBLAS::cuBLAS PROPERTIES
         IMPORTED_LOCATION ${CUDA_cublas_LIBRARY}
         INTERFACE_INCLUDE_DIRECTORIES "${OPENCL_INCLUDE_DIR};${CUDA_TOOLKIT_INCLUDE}"
         INTERFACE_LINK_LIBRARIES "Threads::Threads;${CUDA_CUDA_LIBRARY};${CUDA_LIBRARIES}"
diff --git a/cmake/FindcuRAND.cmake b/cmake/FindcuRAND.cmake
index c2df693aa..850e7975e 100644
--- a/cmake/FindcuRAND.cmake
+++ b/cmake/FindcuRAND.cmake
@@ -59,7 +59,7 @@
 find_package(CUDA 10.0 REQUIRED)
 get_filename_component(SYCL_BINARY_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
 
-if (NOT (ONEMKL_SYCL_IMPLEMENTATION STREQUAL "hipsycl"))
+if (NOT (ONEMATH_SYCL_IMPLEMENTATION STREQUAL "hipsycl"))
 # the OpenCL include file from cuda is opencl 1.1 and it is not compatible with DPC++
 # the OpenCL include headers 1.2 onward is required. This is used to bypass NVIDIA OpenCL headers
 find_path(OPENCL_INCLUDE_DIR CL/cl.h OpenCL/cl.h 
@@ -77,7 +77,7 @@ find_package(Threads REQUIRED)
 
 include(FindPackageHandleStandardArgs)
 
-if (ONEMKL_SYCL_IMPLEMENTATION STREQUAL "hipsycl")
+if (ONEMATH_SYCL_IMPLEMENTATION STREQUAL "hipsycl")
 find_package_handle_standard_args(cuRAND
     REQUIRED_VARS
 	CUDA_TOOLKIT_INCLUDE
@@ -86,9 +86,9 @@ find_package_handle_standard_args(cuRAND
         CUDA_CUDA_LIBRARY
 )
 
-  if(NOT TARGET ONEMKL::cuRAND::cuRAND)
-  add_library(ONEMKL::cuRAND::cuRAND SHARED IMPORTED)
-  set_target_properties(ONEMKL::cuRAND::cuRAND PROPERTIES
+  if(NOT TARGET ONEMATH::cuRAND::cuRAND)
+  add_library(ONEMATH::cuRAND::cuRAND SHARED IMPORTED)
+  set_target_properties(ONEMATH::cuRAND::cuRAND PROPERTIES
     IMPORTED_LOCATION ${CUDA_curand_LIBRARY}
     INTERFACE_INCLUDE_DIRECTORIES "${CUDA_TOOLKIT_INCLUDE}"
     INTERFACE_LINK_LIBRARIES "Threads::Threads;${CUDA_CUDA_LIBRARY};${CUDA_LIBRARIES}"
@@ -104,9 +104,9 @@ find_package_handle_standard_args(cuRAND
         OPENCL_INCLUDE_DIR
 )
 
-  if(NOT TARGET ONEMKL::cuRAND::cuRAND)
-  add_library(ONEMKL::cuRAND::cuRAND SHARED IMPORTED)
-  set_target_properties(ONEMKL::cuRAND::cuRAND PROPERTIES
+  if(NOT TARGET ONEMATH::cuRAND::cuRAND)
+  add_library(ONEMATH::cuRAND::cuRAND SHARED IMPORTED)
+  set_target_properties(ONEMATH::cuRAND::cuRAND PROPERTIES
     IMPORTED_LOCATION ${CUDA_curand_LIBRARY}
     INTERFACE_INCLUDE_DIRECTORIES "${OPENCL_INCLUDE_DIR};${CUDA_TOOLKIT_INCLUDE}"
     INTERFACE_LINK_LIBRARIES "Threads::Threads;${CUDA_CUDA_LIBRARY};${CUDA_LIBRARIES}"
diff --git a/cmake/FindcuSOLVER.cmake b/cmake/FindcuSOLVER.cmake
index eafcf96aa..ea4ffc1a6 100644
--- a/cmake/FindcuSOLVER.cmake
+++ b/cmake/FindcuSOLVER.cmake
@@ -41,9 +41,9 @@ find_package_handle_standard_args(cuSOLVER
         CUDA_CUDA_LIBRARY
         OPENCL_INCLUDE_DIR
 )
-if(NOT TARGET ONEMKL::cuSOLVER::cuSOLVER)
-  add_library(ONEMKL::cuSOLVER::cuSOLVER SHARED IMPORTED)
-  set_target_properties(ONEMKL::cuSOLVER::cuSOLVER PROPERTIES
+if(NOT TARGET ONEMATH::cuSOLVER::cuSOLVER)
+  add_library(ONEMATH::cuSOLVER::cuSOLVER SHARED IMPORTED)
+  set_target_properties(ONEMATH::cuSOLVER::cuSOLVER PROPERTIES
       IMPORTED_LOCATION ${CUDA_cusolver_LIBRARY}
       INTERFACE_INCLUDE_DIRECTORIES "${OPENCL_INCLUDE_DIR};${CUDA_TOOLKIT_INCLUDE}"
       INTERFACE_LINK_LIBRARIES "Threads::Threads;${CUDA_CUDA_LIBRARY};${CUDA_LIBRARIES}"
diff --git a/cmake/WarningsUtils.cmake b/cmake/WarningsUtils.cmake
index 3b5f76afb..14eb274f5 100644
--- a/cmake/WarningsUtils.cmake
+++ b/cmake/WarningsUtils.cmake
@@ -19,15 +19,15 @@
 
 include_guard(GLOBAL)
 
-add_library(onemkl_warnings INTERFACE)
+add_library(onemath_warnings INTERFACE)
 
-set(ONEMKL_WARNINGS "")
+set(ONEMATH_WARNINGS "")
 
 include(CheckCXXCompilerFlag)
 macro(add_warning flag)
   check_cxx_compiler_flag(${flag} IS_SUPPORTED)
   if(${IS_SUPPORTED})
-    list(APPEND ONEMKL_WARNINGS ${flag})
+    list(APPEND ONEMATH_WARNINGS ${flag})
   else()
     message(WARNING "Compiler does not support ${flag}")
   endif()
@@ -39,10 +39,10 @@ add_warning("-Wshadow")
 add_warning("-Wconversion")
 add_warning("-Wpedantic")
 
-message(VERBOSE "Domains with warnings enabled use: ${ONEMKL_WARNINGS}")
+message(VERBOSE "Domains with warnings enabled use: ${ONEMATH_WARNINGS}")
 
-# The onemkl_warnings target can be linked to any other target to enable warnings.
-target_compile_options(onemkl_warnings INTERFACE ${ONEMKL_WARNINGS})
+# The onemath_warnings target can be linked to any other target to enable warnings.
+target_compile_options(onemath_warnings INTERFACE ${ONEMATH_WARNINGS})
 
 # Add the library to install package
-install(TARGETS onemkl_warnings EXPORT oneMKLTargets)
+install(TARGETS onemath_warnings EXPORT oneMathTargets)
diff --git a/cmake/oneMKLConfig.cmake b/cmake/oneMathConfig.cmake
similarity index 92%
rename from cmake/oneMKLConfig.cmake
rename to cmake/oneMathConfig.cmake
index 5baf9024b..bb261da47 100644
--- a/cmake/oneMKLConfig.cmake
+++ b/cmake/oneMathConfig.cmake
@@ -20,11 +20,10 @@
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})
 include(CMakeFindDependencyMacro)
 
-#find_dependency(MKL REQUIRED)
 # try to search for SYCLConfig first to find compiler. If it's not present, use local FindCompiler.cmake
 find_package(SYCL QUIET)
 if(NOT ${SYCL_FOUND})
   find_package(Compiler REQUIRED)
 endif()
 
-include("${CMAKE_CURRENT_LIST_DIR}/oneMKLTargets.cmake")
+include("${CMAKE_CURRENT_LIST_DIR}/oneMathTargets.cmake")
diff --git a/docs/README.md b/docs/README.md
index 040d22aac..2ef6e5a73 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,6 +1,6 @@
-### oneMKL documentation
+### oneMath documentation
 
-This folder contains oneMKL documentation in reStructuredText (rST) format.
+This folder contains oneMath documentation in reStructuredText (rST) format.
 
 The documentation build step is skipped by default.
 To enable building documentation from the main build, set `-DBUILD_DOC=ON`.
@@ -8,9 +8,9 @@ For more information see [Building with CMake](../README.md#building-with-cmake)
 
 To build documentation only, use the following commands from the current folder:
 ```bash
-# Inside <path to onemkl>/docs
+# Inside <path to onemath>/docs
 mkdir build && cd build
 cmake ..
 cmake --build .
 ```
-Generated documentation can be found in `<path to onemkl>/docs/build/Documentation`
+Generated documentation can be found in `<path to onemath>/docs/build/Documentation`
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
index cae88b2cc..d6145c75e 100644
--- a/docs/_templates/layout.html
+++ b/docs/_templates/layout.html
@@ -5,7 +5,7 @@
   var wapLocalCode = 'us-en'; // Dynamically set per localized site; see mapping table for values
   var wapSection = "oneapi-mkl"; // WAP team will give you a unique section for your site
   // Load TMS
-  if (document.location.href.includes("oneapi-src.github.io")) {
+  if (document.location.href.includes("uxlfoundation.github.io")) {
     (function () {
       var url = 'https://www.intel.com/content/dam/www/global/wap/tms-loader.js'; // WAP file URL
       var po = document.createElement('script'); po.type = 'text/javascript'; po.async = true; po.src = url;
diff --git a/docs/building_and_running_tests.rst b/docs/building_and_running_tests.rst
index 4cf5c9187..da7ff30fd 100644
--- a/docs/building_and_running_tests.rst
+++ b/docs/building_and_running_tests.rst
@@ -28,7 +28,7 @@ following:
   cmake -DCMAKE_INSTALL_PREFIX=~/lapack -DCBLAS=True -DLAPACK=True -DLAPACKE=True -DBUILD_INDEX64=False -DBUILD_SHARED_LIBS=True ..
   cmake --build . -j --target install
 
-and then used in oneMKL by setting ``-REF_BLAS_ROOT=/path/to/lapack/install``
+and then used in oneMath by setting ``-REF_BLAS_ROOT=/path/to/lapack/install``
 and ``-DREF_LAPACK_ROOT=/path/to/lapack/install``.
 
 You can re-run tests without re-building the entire project.
diff --git a/docs/building_the_project_with_adaptivecpp.rst b/docs/building_the_project_with_adaptivecpp.rst
index 98c763b90..086fb3a86 100644
--- a/docs/building_the_project_with_adaptivecpp.rst
+++ b/docs/building_the_project_with_adaptivecpp.rst
@@ -16,26 +16,26 @@ Environment Setup
 
 #. 
    Clone this project. The root directory of the cloned repository will be
-   referred to as ``<path to onemkl>``.
+   referred to as ``<path to onemath>``.
 
 #. 
    Download and install the `required dependencies
-   <https://github.com/oneapi-src/oneMKL?tab=readme-ov-file#software-requirements>`_
+   <https://github.com/uxlfoundation/oneMath?tab=readme-ov-file#software-requirements>`_
    manually.
 
 Build Commands
 ###############
 
-In most cases, building oneMKL Interfaces is as simple as setting the compiler and
+In most cases, building oneMath is as simple as setting the compiler and
 selecting the desired backends to build with.
 
 On Linux (other OSes are not supported with the AdaptiveCpp compiler):
 
 .. code-block:: bash
 
-  # Inside <path to onemkl>
+  # Inside <path to onemath>
   mkdir build && cd build
-  cmake .. -DONEMKL_SYCL_IMPLEMENTATION=hipsycl    \ # Indicate that AdaptiveCpp is being used.
+  cmake .. -DONEMATH_SYCL_IMPLEMENTATION=hipsycl   \ # Indicate that AdaptiveCpp is being used.
           -DENABLE_MKLGPU_BACKEND=False            \ # MKLGPU backend is not supported by AdaptiveCpp
           -DENABLE_<BACKEND_NAME>_BACKEND=True     \ # Enable backend(s) (optional)
           -DENABLE_<BACKEND_NAME_2>_BACKEND=True   \ # Multiple backends can be enabled at once.
@@ -48,9 +48,9 @@ On Linux (other OSes are not supported with the AdaptiveCpp compiler):
 Backends should be enabled by setting ``-DENABLE_<BACKEND_NAME>_BACKEND=True`` for
 each desired backend. By default, the ``MKLGPU`` and ``MKLCPU`` backends are
 enabled, but ``MKLGPU`` must be disabled with AdaptiveCpp. The supported
-backends for the compilers are given in the table at `oneMKL supported
+backends for the compilers are given in the table at `oneMath supported
 configurations table
-<https://github.com/oneapi-src/oneMKL?tab=readme-ov-file#supported-configurations>`_,
+<https://github.com/uxlfoundation/oneMath?tab=readme-ov-file#supported-configurations>`_,
 and the CMake option names are given in the table below. Some backends may
 require additional parameters to be set. See the relevant section below for
 additional guidance. The target architectures must be specified with
diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst
index efe92f285..49fd51abb 100644
--- a/docs/building_the_project_with_dpcpp.rst
+++ b/docs/building_the_project_with_dpcpp.rst
@@ -3,15 +3,14 @@
 Building the Project with DPC++
 ===============================
 
-This page describes building the oneMKL Interfaces with either the Intel(R)
-oneAPI DPC++ Compiler or open-source oneAPI DPC++ Compiler. For guidance on
-building the project with AdaptiveCpp, see
-:ref:`building_the_project_with_adaptivecpp`.
+This page describes building oneMath with either the Intel(R) oneAPI DPC++
+Compiler or open-source oneAPI DPC++ Compiler. For guidance on building the
+project with AdaptiveCpp, see :ref:`building_the_project_with_adaptivecpp`.
 
 .. _build_setup_with_dpcpp:
 
 Environment Setup
-##################
+#################
 
 #. 
    Install the required DPC++ compiler (Intel(R) DPC++ or Open DPC++ - see
@@ -19,16 +18,16 @@ Environment Setup
 
 #. 
    Clone this project. The root directory of the cloned repository will be
-   referred to as ``<path to onemkl>``.
+   referred to as ``<path to onemath>``.
 
 #. 
    Build and install all `required dependencies
-   <https://github.com/oneapi-src/oneMKL?tab=readme-ov-file#software-requirements>`_. 
+   <https://github.com/uxlfoundation/oneMath?tab=readme-ov-file#software-requirements>`_.
 
 .. _build_introduction_with_dpcpp:
 
 Build Commands
-###############
+##############
 
 The build commands for various compilers and backends differ mostly in setting
 the values of CMake options for compiler and backend. In this section, we
@@ -40,7 +39,7 @@ for Windows`_ for building on Windows):
 
 .. code-block:: bash
 
-  # Inside <path to onemkl>
+  # Inside <path to onemath>
   mkdir build && cd build
   cmake .. -DCMAKE_CXX_COMPILER=$CXX_COMPILER    \ # Should be icpx or clang++
           -DCMAKE_C_COMPILER=$C_COMPILER         \ # Should be icx or clang
@@ -57,13 +56,13 @@ In the above, the ``$CXX_COMPILER`` and ``$C_COMPILER`` should be set to
 ``icpx`` and ``icx`` respectively when using the Intel(R) oneAPI DPC++ Compiler,
 or ``clang++`` and ``clang`` respectively when using the Open DPC++ Compiler. 
 
-Backends should be enabled by setting ``-DENABLE_<BACKEND_NAME>_BACKEND=True`` for
-each desired backend. By default, only the ``MKLGPU`` and ``MKLCPU`` backends
-are enabled. Multiple backends for multiple device vendors can be enabled at
-once (albeit with limitations when using portBLAS and portFFT). The supported
-backends for the compilers are given in the table at `oneMKL supported
-configurations table
-<https://github.com/oneapi-src/oneMKL?tab=readme-ov-file#supported-configurations>`_,
+Backends should be enabled by setting ``-DENABLE_<BACKEND_NAME>_BACKEND=True``
+for each desired backend. By default, only the ``MKLGPU`` and ``MKLCPU``
+backends are enabled. Multiple backends for multiple device vendors can be
+enabled at once (albeit with limitations when using portBLAS and portFFT). The
+supported backends for the compilers are given in the table at `oneMath
+supported configurations table
+<https://github.com/uxlfoundation/oneMath?tab=readme-ov-file#supported-configurations>`_,
 and the CMake option names are given in the table below. Some backends may
 require additional parameters to be set. See the relevant section below for
 additional guidance.
@@ -148,13 +147,13 @@ Some additional build options are given in the section `Additional build options
 TARGET_DOMAINS
 ^^^^^^^^^^^^^^
 
-oneMKL supports multiple domains: BLAS, DFT, LAPACK, RNG and sparse BLAS. The
-domains built by oneMKL can be selected using the ``TARGET_DOMAINS`` parameter.
+oneMath supports multiple domains: BLAS, DFT, LAPACK, RNG and sparse BLAS. The
+domains built by oneMath can be selected using the ``TARGET_DOMAINS`` parameter.
 In most cases, ``TARGET_DOMAINS`` is set automatically according to the domains
 supported by the backend libraries enabled. However, while most backend
 libraries support only one of these domains, but some may support multiple. For
 example, the ``MKLCPU`` backend supports every domain. To enable support for
-only the BLAS domain in the oneMKL Interfaces whilst compiling with ``MKLCPU``,
+only the BLAS domain in oneMath whilst compiling with ``MKLCPU``,
 ``TARGET_DOMAINS`` could be set to ``blas``. To enable BLAS and DFT,
 ``-DTARGET_DOMAINS="blas dft"`` would be used.
 
@@ -162,12 +161,12 @@ only the BLAS domain in the oneMKL Interfaces whilst compiling with ``MKLCPU``,
 Backends
 #########
 
-.. _build_for_intel_onemkl_dpcpp:
+.. _build_for_intel_onemath_dpcpp:
 
 Building for Intel(R) oneMKL
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The Intel(R) oneMKL backend supports multiple domains on both x86 CPUs and Intel
+The Intel(R) oneMKL backends support multiple domains on both x86 CPUs and Intel
 GPUs. The MKLCPU backend using Intel(R) oneMKL for x86 CPU is enabled by
 default, and controlled with the parameter ``ENABLE_MKLCPU_BACKEND``. The MKLGPU
 backend using Intel(R) oneMKL for Intel GPU is enabled by default, and
@@ -234,8 +233,8 @@ Building for other SYCL devices
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 SYCL enables portable heterogeneous computing on a wide range of accelerators.
-Consequently, it is possible to use oneMKL Interfaces with accelerators not
-anticipated by the oneMKL Interfaces team.
+Consequently, it is possible to use oneMath with accelerators not anticipated by
+the project.
 
 For generic SYCL devices, only portBLAS and portFFT backend are enabled.
 The user must set the appropriate ``-fsycl-targets`` for their device, and also
@@ -246,14 +245,14 @@ unsupported configurations.
 .. _build_for_portlibs_dpcpp:
 
 Pure SYCL backends: portBLAS and portFFT
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 `portBLAS <https://github.com/codeplaysoftware/portBLAS>`_ and `portFFT
 <https://github.com/codeplaysoftware/portFFT>`_ are experimental pure-SYCL
 backends that work on all SYCL targets supported by the DPC++ compiler. Since
 they support multiple targets, they cannot be enabled with other backends in the
-same domain, or the MKLCPU or MKLGPU backends. Both libraries are experimental
-and currently only support a subset of operations and features.
+same domain, or the ``MKLCPU`` or ``MKLGPU`` backends. Both libraries are
+experimental and currently only support a subset of operations and features.
 
 For best performance, both libraries must be tuned. See the individual sections
 for more details.
@@ -315,10 +314,10 @@ specified. See `DPC++ User Manual
 .. _build_additional_options_dpcpp:
 
 Additional Build Options
-##########################
+########################
 
-When building oneMKL the SYCL implementation can be specified by setting the
-``ONEMKL_SYCL_IMPLEMENTATION`` option. Possible values are:
+When building oneMath the SYCL implementation can be specified by setting the
+``ONEMATH_SYCL_IMPLEMENTATION`` option. Possible values are:
 
 * ``dpc++`` (default) for the `Intel(R) oneAPI DPC++ Compiler
   <https://software.intel.com/en-us/oneapi/dpc-compiler>`_ and for the `oneAPI
@@ -351,20 +350,20 @@ The following table provides details of CMake options and their default values:
 
 .. note::
   When building with ``BUILD_FUNCTIONAL_TESTS=True`` (default option) only single CUDA backend can be built
-  (`#270 <https://github.com/oneapi-src/oneMKL/issues/270>`_).
+  (`#270 <https://github.com/uxlfoundation/oneMath/issues/270>`_).
 
 
 .. _build_invocation_examples_dpcpp:
 
 CMake invocation examples
-##########################
+#########################
 
-Build oneMKL with support for Nvidia GPUs with tests
+Build oneMath with support for Nvidia GPUs with tests
 disabled using the Ninja build system:
 
 .. code-block:: bash
 
-  cmake $ONEMKL_DIR \
+  cmake $ONEMATH_DIR \
       -GNinja \
       -DCMAKE_CXX_COMPILER=clang++ \
       -DCMAKE_C_COMPILER=clang \
@@ -377,17 +376,17 @@ disabled using the Ninja build system:
       -DENABLE_CUSPARSE_BACKEND=True \
       -DBUILD_FUNCTIONAL_TESTS=False
 
-``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``)
+``$ONEMATH_DIR`` points at the oneMath source directly. The x86 CPU (``MKLCPU``)
 and Intel GPU (``MKLGPU``) backends are enabled by default, but are disabled
-here. The backends for Nvidia GPUs must all be explicilty enabled. The tests are
+here. The backends for Nvidia GPUs must all be explicitly enabled. The tests are
 disabled, but the examples will still be built.
 
-Building oneMKL with support for AMD GPUs with tests
+Building oneMath with support for AMD GPUs with tests
 disabled:
 
 .. code-block:: bash
 
-  cmake $ONEMKL_DIR \
+  cmake $ONEMATH_DIR \
       -DCMAKE_CXX_COMPILER=clang++ \ 
       -DCMAKE_C_COMPILER=clang \
       -DENABLE_MKLCPU_BACKEND=False \
@@ -398,18 +397,18 @@ disabled:
       -DHIP_TARGETS=gfx90a \
       -DBUILD_FUNCTIONAL_TESTS=False
 
-``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``)
+``$ONEMATH_DIR`` points at the oneMath source directly. The x86 CPU (``MKLCPU``)
 and Intel GPU (``MKLGPU``) backends are enabled by default, but are disabled
-here. The backends for AMD GPUs must all be explicilty enabled. The tests are
+here. The backends for AMD GPUs must all be explicitly enabled. The tests are
 disabled, but the examples will still be built.
 
 
-Build oneMKL for the DFT domain only with support for x86 CPU, Intel GPU, AMD
+Build oneMath for the DFT domain only with support for x86 CPU, Intel GPU, AMD
 GPU and Nvidia GPU with testing enabled:
 
 .. code-block:: bash
 
-  cmake $ONEMKL_DIR \ 
+  cmake $ONEMATH_DIR \ 
       -DCMAKE_CXX_COMPILER=icpx \
       -DCMAKE_C_COMPILER=icx \ 
       -DENABLE_ROCFFT_BACKEND=True \
@@ -427,35 +426,35 @@ set, the backend libraries to enable the use of BLAS, LAPACK and RNG with MKLGPU
 and MKLCPU would also be enabled. The build of examples is disabled. Since
 functional testing was not disabled, tests would be built.
 
-Build oneMKL for the BLAS domain on a generic SYCL device:
+Build oneMath for the BLAS domain on a generic SYCL device:
 
 .. code-block:: bash
 
-  cmake $ONEMKL_DIR \ 
+  cmake $ONEMATH_DIR \ 
       -DCMAKE_CXX_COMPILER=clang++ \
       -DCMAKE_C_COMPILER=clang \ 
       -DENABLE_MKLCPU_BACKEND=False \ 
       -DENABLE_MKLGPU_BACKEND=False \
       -DENABLE_PORTBLAS_BACKEND=True
 
-Note that this is not a tested configuration. This builds oneMKL Interfaces
-with the portBLAS backend only, for a generic SYCL device supported by the 
-Open DPC++ project.
+Note that this is not a tested configuration. This builds oneMath with the
+portBLAS backend only, for a generic SYCL device supported by the Open DPC++
+project.
 
-Build oneMKL for the DFT domain on a generic SYCL device:
+Build oneMath for the DFT domain on a generic SYCL device:
 
 .. code-block:: bash
 
-  cmake $ONEMKL_DIR \
+  cmake $ONEMATH_DIR \
       -DCMAKE_CXX_COMPILER=clang++ \
       -DCMAKE_C_COMPILER=clang \
       -DENABLE_MKLCPU_BACKEND=False \
       -DENABLE_MKLGPU_BACKEND=False \
       -DENABLE_PORTFFT_BACKEND=True
 
-Note that this is not a tested configuration. This builds oneMKL Interfaces
-with the portFFT backend only, for a generic SYCL device supported by the
-Open DPC++ project.
+Note that this is not a tested configuration. This builds oneMath with the
+portFFT backend only, for a generic SYCL device supported by the Open DPC++
+project.
 
 .. _project_cleanup:
 
@@ -483,12 +482,12 @@ Building for Windows
 ####################
 
 The Windows build is similar to the Linux build, albeit that `fewer backends are
-supported <https://github.com/oneapi-src/oneMKL?tab=readme-ov-file#windows>`_.
+supported <https://github.com/uxlfoundation/oneMath?tab=readme-ov-file#windows>`_.
 Additionally, the Ninja build system must be used. For example:
 
 .. code-block:: bash
 
-  # Inside <path to onemkl>
+  # Inside <path to onemath>
   md build && cd build
   cmake .. -G Ninja [-DCMAKE_CXX_COMPILER=<path_to_icx_compiler>\bin\icx] # required only if icx is not found in environment variable PATH
                     [-DCMAKE_C_COMPILER=<path_to_icx_compiler>\bin\icx]   # required only if icx is not found in environment variable PATH
@@ -505,14 +504,14 @@ Build FAQ
 #########
 
 clangrt builtins lib not found
-  Encountered when trying to build oneMKL with some ROCm libraries. There are
+  Encountered when trying to build oneMath with some ROCm libraries. There are
   several possible solutions:
 
   * If building Open DPC++ from source, add ``compiler-rt`` to the external
     projects compile option: ``--llvm-external-projects compiler-rt``.
   * Manually set the variable ``HIP_CXX_COMPILER`` to HIP's toolkit ``clang++``
     path, for instance ``-DHIP_CXX_COMPILER=/opt/rocm/6.1.0/llvm/bin/clang++``.
-    oneMKL may fail to link if the clang versions of ``icpx`` and ``rocm`` are
+    oneMath may fail to link if the clang versions of ``icpx`` and ``rocm`` are
     not compatible.
 
 Could NOT find CBLAS (missing: CBLAS file)
diff --git a/docs/conf.py.in b/docs/conf.py.in
index d874dbab7..6167f8f7f 100644
--- a/docs/conf.py.in
+++ b/docs/conf.py.in
@@ -19,7 +19,7 @@
 
 # -- Project information -----------------------------------------------------
 
-project = 'oneAPI Math Kernel Library Interfaces'
+project = 'oneMath'
 copyright = '2020-2022, Intel Corporation'
 author = 'Intel Corporation'
 
@@ -90,7 +90,7 @@ html_favicon = f'{static_dir}/favicons.png'
 
 # Theme options
 html_theme_options = {
-'repository_url': 'https://github.com/oneapi-src/oneMKL',
+'repository_url': 'https://github.com/uxlfoundation/oneMath',
 'path_to_docs': 'docs',
 'use_issues_button': True,
 'use_edit_page_button': True,
diff --git a/docs/create_new_backend.rst b/docs/create_new_backend.rst
index b22df014d..374debc3b 100644
--- a/docs/create_new_backend.rst
+++ b/docs/create_new_backend.rst
@@ -3,22 +3,22 @@
 
 .. _create_backend_wrappers:
 
-Integrating a Third-Party Library to oneAPI Math Kernel Library (oneMKL) Interfaces
-====================================================================================
+Integrating a Third-Party Library to oneAPI Math Library (oneMath)
+==================================================================
 
-This step-by-step tutorial provides examples for enabling new third-party libraries in oneMKL.
+This step-by-step tutorial provides examples for enabling new third-party libraries in oneMath.
 
-oneMKL has a header-based implementation of the interface layer (``include`` directory) and a source-based implementation of the backend layer for each third-party library (``src`` directory). To enable a third-party library, you must update both parts of oneMKL and integrate the new third-party library to the oneMKL build and test systems.
+oneMath has a header-based implementation of the interface layer (``include`` directory) and a source-based implementation of the backend layer for each third-party library (``src`` directory). To enable a third-party library, you must update both parts of oneMath and integrate the new third-party library to the oneMath build and test systems.
 
 For the new backend library and header naming please use the following template:
 
 .. code-block::
 
-    onemkl_<domain>_<3rd-party library short name>[<wrapper for specific target>]
+    onemath_<domain>_<3rd-party library short name>[<wrapper for specific target>]
 
-Where ``<wrapper for specific target>`` is required only if multiple wrappers are provided from the same 3rd-party library, e.g., wrappers with Intel oneMKL C API for CPU target ``onemkl_blas_mklcpu.so`` and wrappers with Intel oneMKL DPC++ API for GPU target ``onemkl_blas_mklgpu.so``.
+Where ``<wrapper for specific target>`` is required only if multiple wrappers are provided from the same 3rd-party library, e.g., wrappers with Intel oneMKL C API for CPU target ``onemath_blas_mklcpu.so`` and wrappers with Intel oneMKL DPC++ API for GPU target ``onemath_blas_mklgpu.so``.
 
-If there is no need for multiple wrappers only ``<domain>`` and ``<3rd-party library short name>`` are required, e.g. ``onemkl_rng_curand.so``
+If there is no need for multiple wrappers only ``<domain>`` and ``<3rd-party library short name>`` are required, e.g. ``onemath_rng_curand.so``
 
 `1. Create Header Files`_
 
@@ -38,22 +38,22 @@ If there is no need for multiple wrappers only ``<domain>`` and ``<3rd-party lib
 For each new backend library, you should create the following two header files:
 
 * Header file with a declaration of entry points to the new third-party library wrappers
-* Compiler-time dispatching interface (see `oneMKL Usage Models <../README.md#supported-usage-models>`_) for new third-party libraries
+* Compiler-time dispatching interface (see `oneMath Usage Models <../README.md#supported-usage-models>`_) for new third-party libraries
 
-**Header File Example**: command to generate the header file with a declaration of BLAS entry points in the oneapi::mkl::newlib namespace 
+**Header File Example**: command to generate the header file with a declaration of BLAS entry points in the oneapi::math::newlib namespace 
 
 .. code-block:: bash
 
-    python scripts/generate_backend_api.py include/oneapi/mkl/blas.hpp \                                  # Base header file
-                                           include/oneapi/mkl/blas/detail/newlib/onemkl_blas_newlib.hpp \ # Output header file
-                                           oneapi::mkl::newlib                                            # Wrappers namespace
+    python scripts/generate_backend_api.py include/oneapi/math/blas.hpp \                                  # Base header file
+                                           include/oneapi/math/blas/detail/newlib/onemath_blas_newlib.hpp \ # Output header file
+                                           oneapi::math::newlib                                            # Wrappers namespace
 
-Code snippet of the generated header file ``include/oneapi/mkl/blas/detail/newlib/onemkl_blas_newlib.hpp``
+Code snippet of the generated header file ``include/oneapi/math/blas/detail/newlib/onemath_blas_newlib.hpp``
 
 .. code-block:: cpp
 
     namespace oneapi {
-    namespace mkl {
+    namespace math {
     namespace newlib {
     
     void asum(sycl::queue &queue, std::int64_t n, sycl::buffer<float, 1> &x, std::int64_t incx,
@@ -65,19 +65,19 @@ Code snippet of the generated header file ``include/oneapi/mkl/blas/detail/newli
 
 .. code-block:: bash
 
-    python scripts/generate_ct_instant.py   include/oneapi/mkl/blas/detail/blas_ct_templates.hpp \         # Base header file
-                                            include/oneapi/mkl/blas/detail/newlib/blas_ct.hpp \            # Output header file
-                                            include/oneapi/mkl/blas/detail/newlib/onemkl_blas_newlib.hpp \ # Header file with declaration of entry points to wrappers
+    python scripts/generate_ct_instant.py   include/oneapi/math/blas/detail/blas_ct_templates.hpp \         # Base header file
+                                            include/oneapi/math/blas/detail/newlib/blas_ct.hpp \            # Output header file
+                                            include/oneapi/math/blas/detail/newlib/onemath_blas_newlib.hpp \ # Header file with declaration of entry points to wrappers
                                             newlib \                                                       # Library name
                                             newdevice \                                                    # Backend name
-                                            oneapi::mkl::newlib                                            # Wrappers namespace
+                                            oneapi::math::newlib                                            # Wrappers namespace
 
-Code snippet of the generated header file ``include/oneapi/mkl/blas/detail/newlib/blas_ct.hpp``
+Code snippet of the generated header file ``include/oneapi/math/blas/detail/newlib/blas_ct.hpp``
 
 .. code-block:: cpp
 
     namespace oneapi {
-    namespace mkl {
+    namespace math {
     namespace blas {
     
     template <>
@@ -85,7 +85,7 @@ Code snippet of the generated header file ``include/oneapi/mkl/blas/detail/newli
                                                    sycl::buffer<float, 1> &x, std::int64_t incx,
                                                    sycl::buffer<float, 1> &result) {
         asum_precondition(queue, n, x, incx, result);
-        oneapi::mkl::newlib::asum(queue, n, x, incx, result);
+        oneapi::math::newlib::asum(queue, n, x, incx, result);
         asum_postcondition(queue, n, x, incx, result);
     }
 
@@ -95,39 +95,39 @@ Code snippet of the generated header file ``include/oneapi/mkl/blas/detail/newli
 2. Integrate Header Files
 -------------------------
 
-Below you can see structure of oneMKL top-level include directory:
+Below you can see structure of oneMath top-level include directory:
 
 ::
 
     include/
         oneapi/
-            mkl/
-                mkl.hpp -> oneMKL spec APIs
-                types.hpp  -> oneMKL spec types
-                blas.hpp   -> oneMKL BLAS APIs w/ pre-check/dispatching/post-check
+            math/
+                math.hpp -> oneMath spec APIs
+                types.hpp  -> oneMath spec types
+                blas.hpp   -> oneMath BLAS APIs w/ pre-check/dispatching/post-check
                 detail/    -> implementation specific header files
-                    exceptions.hpp        -> oneMKL exception classes
-                    backends.hpp          -> list of oneMKL backends
+                    exceptions.hpp        -> oneMath exception classes
+                    backends.hpp          -> list of oneMath backends
                     backends_table.hpp    -> table of backend libraries for each domain and device
                     get_device_id.hpp     -> function to query device information from queue for Run-time dispatching
                 blas/
-                    predicates.hpp -> oneMKL BLAS pre-check post-check
+                    predicates.hpp -> oneMath BLAS pre-check post-check
                     detail/        -> BLAS domain specific implementation details
-                        blas_loader.hpp       -> oneMKL Run-time BLAS API
-                        blas_ct_templates.hpp -> oneMKL Compile-time BLAS API general templates
+                        blas_loader.hpp       -> oneMath Run-time BLAS API
+                        blas_ct_templates.hpp -> oneMath Compile-time BLAS API general templates
                         cublas/
-                            blas_ct.hpp            -> oneMKL Compile-time BLAS API template instantiations for <cublas>
-                            onemkl_blas_cublas.hpp -> backend wrappers library API
+                            blas_ct.hpp            -> oneMath Compile-time BLAS API template instantiations for <cublas>
+                            onemath_blas_cublas.hpp -> backend wrappers library API
                         mklcpu/
-                            blas_ct.hpp            -> oneMKL Compile-time BLAS API template instantiations for <mklcpu>
-                            onemkl_blas_mklcpu.hpp -> backend wrappers library API
+                            blas_ct.hpp            -> oneMath Compile-time BLAS API template instantiations for <mklcpu>
+                            onemath_blas_mklcpu.hpp -> backend wrappers library API
                         <other backends>/
                 <other domains>/
 
 
-To integrate the new third-party library to a oneMKL header-based part, following files from this structure should be updated:
+To integrate the new third-party library to a oneMath header-based part, following files from this structure should be updated:
 
-* ``include/oneapi/mkl/detail/backends.hpp``: add the new backend
+* ``include/oneapi/math/detail/backends.hpp``: add the new backend
 
   **Example**: add the ``newbackend`` backend
 
@@ -142,7 +142,7 @@ To integrate the new third-party library to a oneMKL header-based part, followin
         static backendmap backend_map = { { backend::mklcpu, "mklcpu" },
      +                                    { backend::newbackend, "newbackend" },
 
-* ``include/oneapi/mkl/detail/backends_table.hpp``: add new backend library for supported domain(s) and device(s)
+* ``include/oneapi/math/detail/backends_table.hpp``: add new backend library for supported domain(s) and device(s)
 
   **Example**: enable ``newlib`` for ``blas`` domain and ``newdevice`` device
 
@@ -157,37 +157,37 @@ To integrate the new third-party library to a oneMKL header-based part, followin
             { domain::blas,
               { { device::x86cpu,
                   {
-        #ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+        #ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
                       LIB_NAME("blas_mklcpu")
         #endif
                    } },
      +          { device::newdevice,
      +            {
-     +  #ifdef ONEMKL_ENABLE_NEWLIB_BACKEND
+     +  #ifdef ONEMATH_ENABLE_NEWLIB_BACKEND
      +                 LIB_NAME("blas_newlib")
      +  #endif
      +             } },
 
-* ``include/oneapi/mkl/detail/get_device_id.hpp``: add new device detection mechanism for Run-time dispatching
+* ``include/oneapi/math/detail/get_device_id.hpp``: add new device detection mechanism for Run-time dispatching
 
   **Example**: enable ``newdevice`` if the queue is targeted for the Host
 
   .. code-block:: diff
     
-        inline oneapi::mkl::device get_device_id(sycl::queue &queue) {
-            oneapi::mkl::device device_id;
+        inline oneapi::math::device get_device_id(sycl::queue &queue) {
+            oneapi::math::device device_id;
      +      if (queue.is_host())
      +          device_id=device::newdevice;
 
-* ``include/oneapi/mkl/blas.hpp``: include the generated header file for the compile-time dispatching interface (see `oneMKL Usage Models <../README.md#supported-usage-models>`_)
+* ``include/oneapi/math/blas.hpp``: include the generated header file for the compile-time dispatching interface (see `oneMath Usage Models <../README.md#supported-usage-models>`_)
 
-  **Example**: add ``include/oneapi/mkl/blas/detail/newlib/blas_ct.hpp`` generated at the `1. Create Header Files`_ step
+  **Example**: add ``include/oneapi/math/blas/detail/newlib/blas_ct.hpp`` generated at the `1. Create Header Files`_ step
     
   .. code-block:: diff
     
-        #include "oneapi/mkl/blas/detail/mklcpu/blas_ct.hpp"
-        #include "oneapi/mkl/blas/detail/mklgpu/blas_ct.hpp"
-     +  #include "oneapi/mkl/blas/detail/newlib/blas_ct.hpp"
+        #include "oneapi/math/blas/detail/mklcpu/blas_ct.hpp"
+        #include "oneapi/math/blas/detail/mklgpu/blas_ct.hpp"
+     +  #include "oneapi/math/blas/detail/newlib/blas_ct.hpp"
 
 
 The new files generated at the `1. Create Header Files`_ step result in the following updated structure of the BLAS domain header files.
@@ -196,22 +196,22 @@ The new files generated at the `1. Create Header Files`_ step result in the foll
 
     include/
         oneapi/
-            mkl/
-                blas.hpp -> oneMKL BLAS APIs w/ pre-check/dispatching/post-check
+            math/
+                blas.hpp -> oneMath BLAS APIs w/ pre-check/dispatching/post-check
                 blas/
-                    predicates.hpp -> oneMKL BLAS pre-check post-check
+                    predicates.hpp -> oneMath BLAS pre-check post-check
                     detail/        -> BLAS domain specific implementation details
-                        blas_loader.hpp       -> oneMKL Run-time BLAS API
-                        blas_ct_templates.hpp -> oneMKL Compile-time BLAS API general templates
+                        blas_loader.hpp       -> oneMath Run-time BLAS API
+                        blas_ct_templates.hpp -> oneMath Compile-time BLAS API general templates
                         cublas/
-                            blas_ct.hpp            -> oneMKL Compile-time BLAS API template instantiations for <cublas>
-                            onemkl_blas_cublas.hpp -> backend wrappers library API
+                            blas_ct.hpp            -> oneMath Compile-time BLAS API template instantiations for <cublas>
+                            onemath_blas_cublas.hpp -> backend wrappers library API
                         mklcpu/
-                            blas_ct.hpp            -> oneMKL Compile-time BLAS API template instantiations for <mklcpu>
-                            onemkl_blas_mklcpu.hpp -> backend wrappers library API
+                            blas_ct.hpp            -> oneMath Compile-time BLAS API template instantiations for <mklcpu>
+                            onemath_blas_mklcpu.hpp -> backend wrappers library API
         +              newlib/
-        +                  blas_ct.hpp            -> oneMKL Compile-time BLAS API template instantiations for <newbackend>
-        +                  onemkl_blas_newlib.hpp -> backend wrappers library API
+        +                  blas_ct.hpp            -> oneMath Compile-time BLAS API template instantiations for <newbackend>
+        +                  onemath_blas_newlib.hpp -> backend wrappers library API
                         <other backends>/
                 <other domains>/
 
@@ -219,7 +219,7 @@ The new files generated at the `1. Create Header Files`_ step result in the foll
 
 3. Create Wrappers
 ------------------
-Wrappers convert Data Parallel C++ (DPC++) input data types to third-party library data types and call corresponding implementation from the third-party library. Wrappers for each third-party library are built to separate oneMKL backend libraries. The ``libonemkl.so`` dispatcher library loads the wrappers at run-time if you are using the interface for run-time dispatching, or you will link with them directly in case you are using the interface for compile-time dispatching (for more information see `oneMKL Usage Models <../README.md#supported-usage-models>`_).
+Wrappers convert Data Parallel C++ (DPC++) input data types to third-party library data types and call corresponding implementation from the third-party library. Wrappers for each third-party library are built to separate oneMath backend libraries. The ``libonemath.so`` dispatcher library loads the wrappers at run-time if you are using the interface for run-time dispatching, or you will link with them directly in case you are using the interface for compile-time dispatching (for more information see `oneMath Usage Models <../README.md#supported-usage-models>`_).
 
 All wrappers and dispatcher library implementations are in the ``src`` directory:
 
@@ -248,12 +248,12 @@ You can modify wrappers generated with this script to enable third-party library
 
 The command below generates two new files:
 
-* ``src/blas/backends/newlib/newlib_wrappers.cpp`` - DPC++ wrappers for all functions from ``include/oneapi/mkl/blas/detail/newlib/onemkl_blas_newlib.hpp``
+* ``src/blas/backends/newlib/newlib_wrappers.cpp`` - DPC++ wrappers for all functions from ``include/oneapi/math/blas/detail/newlib/onemath_blas_newlib.hpp``
 * ``src/blas/backends/newlib/newlib_wrappers_table_dyn.cpp`` - structure of symbols for run-time dispatcher (in the same location as wrappers), suffix ``_dyn`` indicates that this file is required for dynamic library only.
 
 .. code-block:: bash
 
-    python scripts/generate_wrappers.py include/oneapi/mkl/blas/detail/newlib/onemkl_blas_newlib.hpp \ # Base header file
+    python scripts/generate_wrappers.py include/oneapi/math/blas/detail/newlib/onemath_blas_newlib.hpp \ # Base header file
                                         src/blas/function_table.hpp \                                  # Declaration for structure of symbols
                                         src/blas/backends/newlib/newlib_wrappers.cpp \                 # Output wrappers
                                         newlib                                                         # Library name
@@ -276,14 +276,14 @@ The following code snippet is updated for ``src/blas/backends/newlib/newlib_wrap
         #include <CL/sycl.hpp>
         #endif
         
-        #include "oneapi/mkl/types.hpp"
+        #include "oneapi/math/types.hpp"
         
-        #include "oneapi/mkl/blas/detail/newlib/onemkl_blas_newlib.hpp"
+        #include "oneapi/math/blas/detail/newlib/onemath_blas_newlib.hpp"
     +    
     +    #include "newlib.h"
         
         namespace oneapi {
-        namespace mkl {
+        namespace math {
         namespace newlib {
         
         void asum(sycl::queue &queue, std::int64_t n, sycl::buffer<float, 1> &x, std::int64_t incx,
@@ -327,7 +327,7 @@ Updated structure of the ``src`` folder with the ``newlib`` wrappers:
 
 4. Integrate Wrappers to the Build System
 -----------------------------------------
-Here is the list of files that should be created/updated to integrate the new wrappers for the third-party library to the oneMKL build system:
+Here is the list of files that should be created/updated to integrate the new wrappers for the third-party library to the oneMath build system:
 
 * Add the new option ``ENABLE_XXX_BACKEND`` for the new third-party library to the top of the ``CMakeList.txt`` file.
 
@@ -368,8 +368,8 @@ Here is the list of files that should be created/updated to integrate the new wr
         include(FindPackageHandleStandardArgs)
         find_package_handle_standard_args(NEWLIB REQUIRED_VARS NEWLIB_LIBRARY)
         # Set cmake target for the library
-        add_library(ONEMKL::NEWLIB::NEWLIB UNKNOWN IMPORTED)
-        set_target_properties(ONEMKL::NEWLIB::NEWLIB PROPERTIES
+        add_library(ONEMATH::NEWLIB::NEWLIB UNKNOWN IMPORTED)
+        set_target_properties(ONEMATH::NEWLIB::NEWLIB PROPERTIES
             IMPORTED_LOCATION ${NEWLIB_LIBRARY})
 
 * Create the ``src/<domain>/backends/<new_directory>/CMakeList.txt`` cmake config file to specify how to build the backend layer for the new third-party library.
@@ -397,9 +397,9 @@ Here is the list of files that should be created/updated to integrate the new wr
   .. code-block:: diff
 
             target_link_libraries(${LIB_OBJ}
-                PUBLIC ONEMKL::SYCL::SYCL
+                PUBLIC ONEMATH::SYCL::SYCL
         -       # Add third-party library to link with here
-        +       PUBLIC ONEMKL::NEWLIB::NEWLIB
+        +       PUBLIC ONEMATH::NEWLIB::NEWLIB
             )
 
 Now you can build the backend library for ``newlib`` to make sure the third-party library integration was completed successfully (for more information, see `Build with cmake <../README.md#building-with-cmake>`_)
@@ -427,8 +427,8 @@ Update the following files to enable the new third-party library for unit tests:
 
   .. code-block:: diff
     
-        #cmakedefine ONEMKL_ENABLE_MKLCPU_BACKEND
-     +  #cmakedefine ONEMKL_ENABLE_NEWLIB_BACKEND
+        #cmakedefine ONEMATH_ENABLE_MKLCPU_BACKEND
+     +  #cmakedefine ONEMATH_ENABLE_NEWLIB_BACKEND
 
 * ``tests/unit_tests/CMakeLists.txt``: add instructions about how to link tests with the new backend library
 
@@ -437,24 +437,24 @@ Update the following files to enable the new third-party library for unit tests:
   .. code-block:: diff
     
         if(ENABLE_MKLCPU_BACKEND)
-            add_dependencies(test_main_ct onemkl_blas_mklcpu)
+            add_dependencies(test_main_ct onemath_blas_mklcpu)
             if(BUILD_SHARED_LIBS)
-                list(APPEND ONEMKL_LIBRARIES onemkl_blas_mklcpu)
+                list(APPEND ONEMATH_LIBRARIES onemath_blas_mklcpu)
             else()
-                list(APPEND ONEMKL_LIBRARIES -foffload-static-lib=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libonemkl_blas_mklcpu.a)
+                list(APPEND ONEMATH_LIBRARIES -foffload-static-lib=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libonemath_blas_mklcpu.a)
                 find_package(MKL REQUIRED)
-                list(APPEND ONEMKL_LIBRARIES ${MKL_LINK_C})
+                list(APPEND ONEMATH_LIBRARIES ${MKL_LINK_C})
             endif()
         endif()
      +
      +    if(ENABLE_NEWLIB_BACKEND)
-     +       add_dependencies(test_main_ct onemkl_blas_newlib)
+     +       add_dependencies(test_main_ct onemath_blas_newlib)
      +       if(BUILD_SHARED_LIBS)
-     +           list(APPEND ONEMKL_LIBRARIES onemkl_blas_newlib)
+     +           list(APPEND ONEMATH_LIBRARIES onemath_blas_newlib)
      +       else()
-     +           list(APPEND ONEMKL_LIBRARIES -foffload-static-lib=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libonemkl_blas_newlib.a)
+     +           list(APPEND ONEMATH_LIBRARIES -foffload-static-lib=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libonemath_blas_newlib.a)
      +           find_package(NEWLIB REQUIRED)
-     +           list(APPEND ONEMKL_LIBRARIES ONEMKL::NEWLIB::NEWLIB)
+     +           list(APPEND ONEMATH_LIBRARIES ONEMATH::NEWLIB::NEWLIB)
      +       endif()
      +   endif()
 
@@ -464,16 +464,16 @@ Update the following files to enable the new third-party library for unit tests:
 
   .. code-block:: diff
     
-        #ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+        #ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
             #define TEST_RUN_INTELGPU(q, func, args) \
-                func<oneapi::mkl::backend::mklgpu> args
+                func<oneapi::math::backend::mklgpu> args
         #else
             #define TEST_RUN_INTELGPU(q, func, args)
         #endif
      +    
-     +  #ifdef ONEMKL_ENABLE_NEWLIB_BACKEND
+     +  #ifdef ONEMATH_ENABLE_NEWLIB_BACKEND
      +     #define TEST_RUN_NEWDEVICE(q, func, args) \
-     +         func<oneapi::mkl::backend::newbackend> args
+     +         func<oneapi::math::backend::newbackend> args
      +  #else
      +      #define TEST_RUN_NEWDEVICE(q, func, args)
      +  #endif
@@ -495,7 +495,7 @@ Update the following files to enable the new third-party library for unit tests:
                 }
             }
      +           
-     +  #ifdef ONEMKL_ENABLE_NEWLIB_BACKEND
+     +  #ifdef ONEMATH_ENABLE_NEWLIB_BACKEND
      +      devices.push_back(sycl::device(sycl::host_selector()));
      +  #endif
 
diff --git a/docs/domains/blas/asum.rst b/docs/domains/blas/asum.rst
deleted file mode 100644
index 1fc02c84c..000000000
--- a/docs/domains/blas/asum.rst
+++ /dev/null
@@ -1,158 +0,0 @@
-.. _onemkl_blas_asum:
-
-asum
-====
-
-Computes the sum of magnitudes of the vector elements.
-
-.. _onemkl_blas_asum_description:
-
-.. rubric:: Description
-
-The ``asum`` routine computes the sum of the magnitudes of elements of a
-real vector, or the sum of magnitudes of the real and imaginary parts
-of elements of a complex vector:
-
-.. math::
-
-   result = \sum_{i=1}^{n}(|Re(x_i)| + |Im(x_i)|) 
-   
-where ``x`` is a vector with ``n`` elements.
-
-``asum`` supports the following precisions for data:
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_res 
-      * -  ``float`` 
-        -  ``float`` 
-      * -  ``double`` 
-        -  ``double`` 
-      * -  ``std::complex<float>`` 
-        -  ``float`` 
-      * -  ``std::complex<double>`` 
-        -  ``double`` 
-
-.. _onemkl_blas_asum_buffer:
-
-asum (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void asum(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T_res,1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void asum(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T_res,1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-   
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Buffer where the scalar result is stored (the sum of magnitudes of
-      the real and imaginary parts of all elements of the vector).
-
-
-.. _onemkl_blas_asum_usm:
-
-asum (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event asum(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        T_res *result,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event asum(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        T_res *result,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Pointer to input vector ``x``. The array holding the vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Pointer to the output matrix where the scalar result is stored
-      (the sum of magnitudes of the real and imaginary parts of all
-      elements of the vector).
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/axpby.rst b/docs/domains/blas/axpby.rst
deleted file mode 100644
index f95247fd6..000000000
--- a/docs/domains/blas/axpby.rst
+++ /dev/null
@@ -1,180 +0,0 @@
-.. _onemkl_blas_axpby:
-
-axpby
-=====
-
-Computes a vector-scalar product added to a scaled-vector.
-
-.. _onemkl_blas_axpby_description:
-
-.. rubric:: Description
-
-The ``axpby`` routines compute two scalar-vector product and add them:
-
-.. math::
-
-      y \leftarrow beta * y + alpha * x
-
-where ``x`` and ``y`` are vectors of ``n`` elements and ``alpha`` and ``beta`` are scalars.
-
-``axpby`` supports the following precisions.
-
-   .. list-table::
-      :header-rows: 1
-
-      * -  T
-      * -  ``float``
-      * -  ``double``
-      * -  ``std::complex<float>``
-      * -  ``std::complex<double>``
-
-.. _onemkl_blas_axpby_buffer:
-
-axpby (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void axpby(sycl::queue &queue,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x, std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y, std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void axpby(sycl::queue &queue,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x, std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y, std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x`` and ``y``.
-
-   alpha
-      Specifies the scalar ``alpha``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at least
-      (1 + (``n`` – 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride between two consecutive elements of the ``x`` vector.
-
-   beta
-      Specifies the scalar ``beta``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size at least
-      (1 + (``n`` – 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride between two consecutive elements of the ``y`` vector.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_axpby_usm:
-
-axpby (USM Version)
--------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event axpby(sycl::queue &queue,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x, std::int64_t incx,
-                        const T beta,
-                        T *y, std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event axpby(sycl::queue &queue,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x, std::int64_t incx,
-                        const T beta,
-                        T *y, std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x`` and ``y``.
-
-   alpha
-      Specifies the scalar alpha.
-
-   beta
-      Specifies the scalar beta.
-
-   x
-      Pointer to the input vector ``x``. The allocated memory must be
-      of size at least (1 + (``n`` – 1)*abs(``incx``)). See
-      :ref:`matrix-storage` for more details.
-
-   incx
-      Stride between consecutive elements of the ``x`` vector.
-
-   y
-      Pointer to the input vector ``y``. The allocated memory must be
-      of size at least (1 + (``n`` – 1)*abs(``incy``)). See
-      :ref:`matrix-storage` for more details.
-
-   incy
-      Stride between consecutive elements of the ``y`` vector.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Array holding the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
-
diff --git a/docs/domains/blas/axpy.rst b/docs/domains/blas/axpy.rst
deleted file mode 100644
index a3f5a69e3..000000000
--- a/docs/domains/blas/axpy.rst
+++ /dev/null
@@ -1,184 +0,0 @@
-.. _onemkl_blas_axpy:
-
-axpy
-====
-
-Computes a vector-scalar product and adds the result to a vector.
-
-.. _onemkl_blas_axpy_description:
-      
-.. rubric:: Description
-
-The ``axpy`` routines compute a scalar-vector product and add the result
-to a vector:
-
-.. math::
-
-      y \leftarrow alpha * x + y
-
-where:
-
-``x`` and ``y`` are vectors of ``n`` elements,
-
-``alpha`` is a scalar.
-
-``axpy`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_axpy_buffer:
-
-axpy (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void axpy(sycl::queue &queue,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void axpy(sycl::queue &queue,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   alpha
-      Specifies the scalar alpha.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at least
-      (1 + (``n`` – 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size at least
-      (1 + (``n`` – 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_axpy_usm:
-
-axpy (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event axpy(sycl::queue &queue,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event axpy(sycl::queue &queue,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   alpha
-      Specifies the scalar alpha.
-
-   x
-      Pointer to the input vector ``x``. The array holding the vector
-      ``x`` must be of size at least (1 + (``n`` – 1)*abs(``incx``)). See
-      :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to the input vector ``y``. The array holding the vector
-      ``y`` must be of size at least (1 + (``n`` – 1)*abs(``incy``)). See
-      :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/axpy_batch.rst b/docs/domains/blas/axpy_batch.rst
deleted file mode 100644
index 308ed2b29..000000000
--- a/docs/domains/blas/axpy_batch.rst
+++ /dev/null
@@ -1,350 +0,0 @@
-.. _onemkl_blas_axpy_batch:
-
-axpy_batch
-==========
-
-Computes a group of ``axpy`` operations.
-
-.. _onemkl_blas_axpy_batch_description:
-
-.. rubric:: Description
-
-The ``axpy_batch`` routines are batched versions of :ref:`onemkl_blas_axpy`, performing
-multiple ``axpy`` operations in a single call. Each ``axpy`` 
-operation adds a scalar-vector product to a vector.
-   
-``axpy_batch`` supports the following precisions for data.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_axpy_batch_buffer:
-
-axpy_batch (Buffer Version)
----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``axpy_batch`` supports only the strided API. 
-
-The strided API operation is defined as:
-::
-  
-   for i = 0 … batch_size – 1
-      X and Y are vectors at offset i * stridex, i * stridey in x and y
-      Y := alpha * X + Y
-   end for
-
-where:
-
-``alpha`` is scalar,
-
-``X`` and ``Y`` are vectors.
-   
-**Strided API**
-
-.. rubric:: Syntax
- 
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void axpy_batch(sycl::queue &queue,
-                       std::int64_t n,
-                       T alpha,
-                       sycl::buffer<T,
-                       1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       sycl::buffer<T,
-                       1> &y,
-                       std::int64_t incy,
-                       std::int64_t stridey,
-                       std::int64_t batch_size)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void axpy_batch(sycl::queue &queue,
-                       std::int64_t n,
-                       T alpha,
-                       sycl::buffer<T,
-                       1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       sycl::buffer<T,
-                       1> &y,
-                       std::int64_t incy,
-                       std::int64_t stridey,
-                       std::int64_t batch_size)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in ``X`` and ``Y``.
-
-   alpha
-       Specifies the scalar ``alpha``.
-
-   x
-      Buffer holding input vectors ``X`` with size ``stridex`` * ``batch_size``.
-
-   incx 
-      Stride of vector ``X``.
-
-   stridex 
-      Stride between different ``X`` vectors.
-
-   y
-      Buffer holding input/output vectors ``Y`` with size ``stridey`` * ``batch_size``.
-
-   incy 
-      Stride of vector ``Y``.
-   
-   stridey 
-      Stride between different ``Y`` vectors.
-
-   batch_size 
-      Specifies the number of ``axpy`` operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Output buffer, overwritten by ``batch_size`` ``axpy`` operations of the form 
-      ``alpha`` * ``X`` + ``Y``.
-
-
-.. _onemkl_blas_axpy_batch_usm:
-
-axpy_batch (USM Version)
-------------------------
-
-.. rubric:: Description
-
-The USM version of ``axpy_batch`` supports the group API and strided API. 
-
-The group API operation is defined as
-::
-   
-   idx = 0
-   for i = 0 … group_count – 1
-       for j = 0 … group_size – 1
-           X and Y are vectors in x[idx] and y[idx]
-           Y := alpha[i] * X + Y
-           idx := idx + 1
-       end for
-   end for
-
-The strided API operation is defined as
-::
-   
-   for i = 0 … batch_size – 1
-      X and Y are vectors at offset i * stridex, i * stridey in x and y
-      Y := alpha * X + Y
-   end for
-
-where:
-
-``alpha`` is scalar,
-
-``X`` and ``Y`` are vectors.
-
-For group API, ``x`` and ``y`` arrays contain the pointers for all the input vectors. 
-The total number of vectors in ``x`` and ``y`` are given by:
-
-.. math::
-
-      total\_batch\_count = \sum_{i=0}^{group\_count-1}group\_size[i]    
-
-For strided API, ``x`` and ``y`` arrays contain all the input vectors. 
-The total number of vectors in ``x`` and ``y`` are given by the ``batch_size`` parameter.
-
-**Group API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event axpy_batch(sycl::queue &queue,
-                              std::int64_t *n,
-                              T *alpha,
-                              const T **x,
-                              std::int64_t *incx,
-                              T **y,
-                              std::int64_t *incy,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event axpy_batch(sycl::queue &queue,
-                              std::int64_t *n,
-                              T *alpha,
-                              const T **x,
-                              std::int64_t *incx,
-                              T **y,
-                              std::int64_t *incy,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Array of ``group_count`` integers. ``n[i]`` specifies the number of elements in vectors ``X`` and ``Y`` for every vector in group ``i``.
-
-   alpha
-       Array of ``group_count`` scalar elements. ``alpha[i]`` specifies the scaling factor for vector ``X`` in group ``i``.
-
-   x
-      Array of pointers to input vectors ``X`` with size ``total_batch_count``.
-      The size of array allocated for the ``X`` vector of the group ``i`` must be at least (1 + (``n[i]`` – 1)*abs(``incx[i]``)). 
-      See :ref:`matrix-storage` for more details.
-
-   incx
-      Array of ``group_count`` integers. ``incx[i]`` specifies the stride of vector ``X`` in group ``i``.
- 
-   y
-      Array of pointers to input/output vectors ``Y`` with size ``total_batch_count``.
-      The size of array allocated for the ``Y`` vector of the group ``i`` must be at least (1 + (``n[i]`` – 1)*abs(``incy[i]``)). 
-      See :ref:`matrix-storage` for more details.
-
-   incy
-      Array of ``group_count`` integers. ``incy[i]`` specifies the stride of vector ``Y`` in group ``i``.
-
-   group_count
-      Number of groups. Must be at least 0.
-
-   group_size
-      Array of ``group_count`` integers. ``group_size[i]`` specifies the number of ``axpy`` operations in group ``i``. 
-      Each element in ``group_size`` must be at least 0.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Array of pointers holding the ``Y`` vectors, overwritten by ``total_batch_count`` ``axpy`` operations of the form 
-      ``alpha`` * ``X`` + ``Y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event axpy_batch(sycl::queue &queue,
-                              std::int64_t n,
-                              T alpha,
-                              const T *x,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T *y,
-                              std::int64_t incy,
-                              std::int64_t stridey,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event axpy_batch(sycl::queue &queue,
-                              std::int64_t n,
-                              T alpha,
-                              const T *x,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T *y,
-                              std::int64_t incy,
-                              std::int64_t stridey,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in ``X`` and ``Y``.
-
-   alpha
-       Specifies the scalar ``alpha``.
-
-   x
-      Pointer to input vectors ``X`` with size ``stridex`` * ``batch_size``.
-
-   incx 
-      Stride of vector ``X``.
-   
-   stridex 
-      Stride between different ``X`` vectors.
-
-   y
-      Pointer to input/output vectors ``Y`` with size ``stridey`` * ``batch_size``.
-
-   incy 
-      Stride of vector ``Y``.
-   
-   stridey 
-      Stride between different ``Y`` vectors.
-
-   batch_size 
-      Specifies the number of ``axpy`` operations to perform.
-  
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Output vectors, overwritten by ``batch_size`` ``axpy`` operations of the form 
-      ``alpha`` * ``X`` + ``Y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:**:ref:`blas-like-extensions`
diff --git a/docs/domains/blas/blas-level-1-routines.rst b/docs/domains/blas/blas-level-1-routines.rst
deleted file mode 100644
index c96c2d54c..000000000
--- a/docs/domains/blas/blas-level-1-routines.rst
+++ /dev/null
@@ -1,76 +0,0 @@
-.. _blas-level-1-routines:
-
-BLAS Level 1 Routines
-=====================
-
-
-.. container::
-
-
-   BLAS Level 1 includes routines which perform
-   vector-vector operations as described in the following table. 
-
-
-   .. container:: tablenoborder
-
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -     Routines
-           -     Description     
-         * -     :ref:`onemkl_blas_asum`   
-           -     Sum of vector magnitudes      
-         * -     :ref:`onemkl_blas_axpy`   
-           -     Scalar-vector product      
-         * -     :ref:`onemkl_blas_copy`   
-           -     Copy vector      
-         * -     :ref:`onemkl_blas_dot`   
-           -     Dot product      
-         * -     :ref:`onemkl_blas_sdsdot`   
-           -     Dot product with double precision      
-         * -     :ref:`onemkl_blas_dotc`   
-           -     Dot product conjugated      
-         * -     :ref:`onemkl_blas_dotu`
-           -     Dot product unconjugated      
-         * -     :ref:`onemkl_blas_nrm2`   
-           -     Vector 2-norm (Euclidean norm)      
-         * -     :ref:`onemkl_blas_rot`
-           -     Plane rotation of points      
-         * -     :ref:`onemkl_blas_rotg`   
-           -     Generate Givens rotation of points      
-         * -     :ref:`onemkl_blas_rotm`   
-           -     Modified Givens plane rotation of points           
-         * -     :ref:`onemkl_blas_rotmg`  
-           -     Generate modified Givens plane rotation of points           
-         * -     :ref:`onemkl_blas_scal`
-           -     Vector-scalar product      
-         * -     :ref:`onemkl_blas_swap`   
-           -     Vector-vector swap      
-         * -     :ref:`onemkl_blas_iamax`   
-           -     Index of the maximum absolute value element of a vector     
-         * -     :ref:`onemkl_blas_iamin`   
-           -     Index of the minimum absolute value element of a vector     
-
-.. toctree::
-    :hidden:
-
-    asum
-    axpy
-    copy
-    dot
-    sdsdot
-    dotc
-    dotu
-    nrm2
-    rot
-    rotg
-    rotm
-    rotmg
-    scal
-    swap
-    iamax
-    iamin
-
-
-**Parent topic:** :ref:`onemkl_blas`
diff --git a/docs/domains/blas/blas-level-2-routines.rst b/docs/domains/blas/blas-level-2-routines.rst
deleted file mode 100644
index 427acbc9b..000000000
--- a/docs/domains/blas/blas-level-2-routines.rst
+++ /dev/null
@@ -1,105 +0,0 @@
-.. _blas-level-2-routines:
-
-BLAS Level 2 Routines
-=====================
-
-
-.. container::
-
-
-   BLAS Level 2 includes routines which perform
-   matrix-vector operations as described in the following table. 
-
-
-   .. container:: tablenoborder
-
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -     Routines
-           -     Description  
-         * -     :ref:`onemkl_blas_gbmv`   
-           -     Matrix-vector product using a general band matrix         
-         * -     :ref:`onemkl_blas_gemv`   
-           -     Matrix-vector product using a general matrix     
-         * -     :ref:`onemkl_blas_ger`   
-           -     Rank-1 update of a general matrix     
-         * -     :ref:`onemkl_blas_gerc`   
-           -     Rank-1 update of a conjugated general matrix     
-         * -     :ref:`onemkl_blas_geru`   
-           -     Rank-1 update of a general matrix, unconjugated          
-         * -     :ref:`onemkl_blas_hbmv`   
-           -     Matrix-vector product using a Hermitian band matrix          
-         * -     :ref:`onemkl_blas_hemv`
-           -     Matrix-vector product using a Hermitian matrix          
-         * -     :ref:`onemkl_blas_her`   
-           -     Rank-1 update of a Hermitian matrix     
-         * -     :ref:`onemkl_blas_her2`   
-           -     Rank-2 update of a Hermitian matrix     
-         * -     :ref:`onemkl_blas_hpmv`   
-           -     Matrix-vector product using a Hermitian packed matrix          
-         * -     :ref:`onemkl_blas_hpr`   
-           -     Rank-1 update of a Hermitian packed matrix     
-         * -     :ref:`onemkl_blas_hpr2`   
-           -     Rank-2 update of a Hermitian packed matrix     
-         * -     :ref:`onemkl_blas_sbmv`   
-           -     Matrix-vector product using symmetric band matrix          
-         * -     :ref:`onemkl_blas_spmv`   
-           -     Matrix-vector product using a symmetric packed matrix          
-         * -     :ref:`onemkl_blas_spr`   
-           -     Rank-1 update of a symmetric packed matrix     
-         * -     :ref:`onemkl_blas_spr2`   
-           -     Rank-2 update of a symmetric packed matrix     
-         * -     :ref:`onemkl_blas_symv`   
-           -     Matrix-vector product using a symmetric matrix          
-         * -     :ref:`onemkl_blas_syr`   
-           -     Rank-1 update of a symmetric matrix     
-         * -     :ref:`onemkl_blas_syr2`   
-           -     Rank-2 update of a symmetric matrix     
-         * -     :ref:`onemkl_blas_tbmv`   
-           -     Matrix-vector product using a triangular band matrix          
-         * -     :ref:`onemkl_blas_tbsv`   
-           -     Solution of a linear system of equations with a triangular band matrix    
-         * -     :ref:`onemkl_blas_tpmv`   
-           -     Matrix-vector product using a triangular packed matrix          
-         * -     :ref:`onemkl_blas_tpsv`   
-           -     Solution of a linear system of equations with a triangular packed matrix    
-         * -     :ref:`onemkl_blas_trmv`   
-           -     Matrix-vector product using a triangular matrix          
-         * -     :ref:`onemkl_blas_trsv`   
-           -     Solution of a linear system of equations with a triangular matrix    
-
-
-
-
-.. toctree::
-    :hidden:
-
-    gbmv
-    gemv
-    ger
-    gerc
-    geru
-    hbmv
-    hemv
-    her
-    her2
-    hpmv
-    hpr
-    hpr2
-    sbmv
-    spmv
-    spr
-    spr2
-    symv
-    syr
-    syr2
-    tbmv
-    tbsv
-    tpmv
-    tpsv
-    trmv
-    trsv
-
-**Parent topic:** :ref:`onemkl_blas`
diff --git a/docs/domains/blas/blas-level-3-routines.rst b/docs/domains/blas/blas-level-3-routines.rst
deleted file mode 100644
index bb7f3f4d6..000000000
--- a/docs/domains/blas/blas-level-3-routines.rst
+++ /dev/null
@@ -1,55 +0,0 @@
-.. _blas-level-3-routines:
-
-BLAS Level 3 Routines
-=====================
-
-
-.. container::
-
-   BLAS Level 3 includes routines which perform
-   matrix-matrix operations as described in the following table. 
-
-
-   .. container:: tablenoborder
-
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -     Routines
-           -     Description     
-         * -     :ref:`onemkl_blas_gemm`   
-           -     Computes a matrix-matrix product with general matrices.   
-         * -     :ref:`onemkl_blas_hemm`   
-           -     Computes a matrix-matrix product where one input matrix is Hermitian and one is general.   
-         * -     :ref:`onemkl_blas_herk`   
-           -     Performs a Hermitian rank-k update.    
-         * -     :ref:`onemkl_blas_her2k`   
-           -     Performs a Hermitian rank-2k update.    
-         * -     :ref:`onemkl_blas_symm`   
-           -     Computes a matrix-matrix product where one input matrix is symmetric and one matrix is general.   
-         * -     :ref:`onemkl_blas_syrk`   
-           -     Performs a symmetric rank-k update.    
-         * -     :ref:`onemkl_blas_syr2k`   
-           -     Performs a symmetric rank-2k update.    
-         * -     :ref:`onemkl_blas_trmm`   
-           -     Computes a matrix-matrix product where one input matrix is triangular and one input matrix is general.   
-         * -     :ref:`onemkl_blas_trsm`   
-           -     Solves a triangular matrix equation (forward or backward solve).   
-
-
-
-.. toctree::
-    :hidden:
-
-    gemm
-    hemm
-    herk
-    her2k
-    symm
-    syrk
-    syr2k
-    trmm
-    trsm
-
-**Parent topic:** :ref:`onemkl_blas`
diff --git a/docs/domains/blas/blas-like-extensions.rst b/docs/domains/blas/blas-like-extensions.rst
deleted file mode 100644
index f447e2f9e..000000000
--- a/docs/domains/blas/blas-like-extensions.rst
+++ /dev/null
@@ -1,55 +0,0 @@
-.. _blas-like-extensions:
-
-BLAS-like Extensions
-====================
-
-
-.. container::
-
-
-   oneAPI Math Kernel Library DPC++ provides additional routines to
-   extend the functionality of the BLAS routines. These include routines
-   to compute many independent vector-vector and matrix-matrix operations.
-
-   The following table lists the BLAS-like extensions with their descriptions.
-
-
-   .. container:: tablenoborder
-
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -     Routines
-           -     Description     
-         * -     :ref:`onemkl_blas_axpy_batch`   
-           -     Computes groups of vector-scalar products added to a vector.
-         * -     :ref:`onemkl_blas_gemm_batch`   
-           -     Computes groups of matrix-matrix products with general matrices.   
-         * -     :ref:`onemkl_blas_trsm_batch`   
-           -     Solves a triangular matrix equation for a group of matrices.   
-         * -     :ref:`onemkl_blas_gemmt`   
-           -     Computes a matrix-matrix product with general matrices, but updates
-                 only the upper or lower triangular part of the result matrix.
-         * -     :ref:`onemkl_blas_gemm_bias`   
-           -     Computes a matrix-matrix product using general integer matrices with bias
- 
-
-
-
-
-.. toctree::
-    :hidden:
-
-    axpy_batch
-    axpby
-    copy_batch
-    dgmm_batch
-    gemm_batch
-    gemv_batch
-    syrk_batch
-    trsm_batch
-    gemmt
-    gemm_bias
-
-**Parent topic:** :ref:`onemkl_blas`
diff --git a/docs/domains/blas/blas.rst b/docs/domains/blas/blas.rst
deleted file mode 100644
index 50411efb8..000000000
--- a/docs/domains/blas/blas.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-.. _onemkl_blas:
-
-BLAS Routines
-+++++++++++++
-
-oneMKL provides DPC++ interfaces to the Basic Linear Algebra Subprograms (BLAS) routines (Level1, Level2, Level3), as well as several BLAS-like extension routines.
-
-.. toctree::
-    :maxdepth: 1
-
-    blas-level-1-routines.rst
-    blas-level-2-routines.rst
-    blas-level-3-routines.rst
-    blas-like-extensions.rst
-
-
-**Parent topic:** :ref:`onemkl_dense_linear_algebra`
diff --git a/docs/domains/blas/copy.rst b/docs/domains/blas/copy.rst
deleted file mode 100644
index e09db618e..000000000
--- a/docs/domains/blas/copy.rst
+++ /dev/null
@@ -1,159 +0,0 @@
-.. _onemkl_blas_copy:
-
-copy
-====
-
-Copies a vector to another vector.
-
-.. _onemkl_blas_copy_description:
-
-.. rubric:: Description
-
-The ``copy`` routines copy one vector to another:
-
-.. math::
-      
-      y \leftarrow  x
-
-where ``x`` and ``y`` are vectors of n elements.
-
-``copy`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-
-.. _onemkl_blas_copy_buffer:
-
-copy (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void copy(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void copy(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at least
-      (1 + (``n`` – 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_copy_usm:
-
-copy (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event copy(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event copy(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-   
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Pointer to the input vector ``x``. The array holding the vector
-      ``x`` must be of size at least (1 + (``n`` – 1)*abs(``incx``)). See
-      :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/copy_batch.rst b/docs/domains/blas/copy_batch.rst
deleted file mode 100644
index b02d71c56..000000000
--- a/docs/domains/blas/copy_batch.rst
+++ /dev/null
@@ -1,328 +0,0 @@
-.. _onemkl_blas_copy_batch:
-
-copy_batch
-==========
-
-Computes a group of ``copy`` operations.
-
-.. _onemkl_blas_copy_batch_description:
-
-.. rubric:: Description
-
-The ``copy_batch`` routines are batched versions of :ref:`onemkl_blas_copy`, performing
-multiple ``copy`` operations in a single call. Each ``copy`` 
-operation copies one vector to another.
-   
-``copy_batch`` supports the following precisions for data.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_copy_batch_buffer:
-
-copy_batch (Buffer Version)
----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``copy_batch`` supports only the strided API. 
-
-The strided API operation is defined as:
-::
-  
-   for i = 0 … batch_size – 1
-      X and Y are vectors at offset i * stridex, i * stridey in x and y
-      Y := X
-   end for
-
-where:
-
-``X`` and ``Y`` are vectors.
-   
-**Strided API**
-
-.. rubric:: Syntax
- 
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void copy_batch(sycl::queue &queue,
-                       std::int64_t n,
-                       sycl::buffer<T,
-                       1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       sycl::buffer<T,
-                       1> &y,
-                       std::int64_t incy,
-                       std::int64_t stridey,
-                       std::int64_t batch_size)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void copy_batch(sycl::queue &queue,
-                       std::int64_t n,
-                       sycl::buffer<T,
-                       1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       sycl::buffer<T,
-                       1> &y,
-                       std::int64_t incy,
-                       std::int64_t stridey,
-                       std::int64_t batch_size)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in ``X`` and ``Y``.
-
-   x
-      Buffer holding input vectors ``X`` with size ``stridex`` * ``batch_size``.
-
-   incx 
-      Stride of vector ``X``.
-
-   stridex 
-      Stride between different ``X`` vectors.
-
-   y
-      Buffer holding input/output vectors ``Y`` with size ``stridey`` * ``batch_size``.
-
-   incy 
-      Stride of vector ``Y``.
-   
-   stridey 
-      Stride between different ``Y`` vectors.
-
-   batch_size 
-      Specifies the number of ``copy`` operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Output buffer, overwritten by ``batch_size`` ``copy`` operations.
-
-
-.. _onemkl_blas_copy_batch_usm:
-
-copy_batch (USM Version)
-------------------------
-
-.. rubric:: Description
-
-The USM version of ``copy_batch`` supports the group API and strided API. 
-
-The group API operation is defined as
-::
-   
-   idx = 0
-   for i = 0 … group_count – 1
-       for j = 0 … group_size – 1
-           X and Y are vectors in x[idx] and y[idx]
-           Y := X
-           idx := idx + 1
-       end for
-   end for
-
-The strided API operation is defined as
-::
-   
-   for i = 0 … batch_size – 1
-      X and Y are vectors at offset i * stridex, i * stridey in x and y
-      Y := X
-   end for
-
-where:
-
-``X`` and ``Y`` are vectors.
-
-For group API, ``x`` and ``y`` arrays contain the pointers for all the input vectors. 
-The total number of vectors in ``x`` and ``y`` are given by:
-
-.. math::
-
-      total\_batch\_count = \sum_{i=0}^{group\_count-1}group\_size[i]    
-
-For strided API, ``x`` and ``y`` arrays contain all the input vectors. 
-The total number of vectors in ``x`` and ``y`` are given by the ``batch_size`` parameter.
-
-**Group API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event copy_batch(sycl::queue &queue,
-                              std::int64_t *n,
-                              const T **x,
-                              std::int64_t *incx,
-                              T **y,
-                              std::int64_t *incy,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event copy_batch(sycl::queue &queue,
-                              std::int64_t *n,
-                              const T **x,
-                              std::int64_t *incx,
-                              T **y,
-                              std::int64_t *incy,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Array of ``group_count`` integers. ``n[i]`` specifies the number of elements in vectors ``X`` and ``Y`` for every vector in group ``i``.
-
-   x
-      Array of pointers to input vectors ``X`` with size ``total_batch_count``.
-      The size of array allocated for the ``X`` vector of the group ``i`` must be at least (1 + (``n[i]`` – 1)*abs(``incx[i]``)). 
-      See :ref:`matrix-storage` for more details.
-
-   incx
-      Array of ``group_count`` integers. ``incx[i]`` specifies the stride of vector ``X`` in group ``i``.
- 
-   y
-      Array of pointers to input/output vectors ``Y`` with size ``total_batch_count``.
-      The size of array allocated for the ``Y`` vector of the group ``i`` must be at least (1 + (``n[i]`` – 1)*abs(``incy[i]``)). 
-      See :ref:`matrix-storage` for more details.
-
-   incy
-      Array of ``group_count`` integers. ``incy[i]`` specifies the stride of vector ``Y`` in group ``i``.
-
-   group_count
-      Number of groups. Must be at least 0.
-
-   group_size
-      Array of ``group_count`` integers. ``group_size[i]`` specifies the number of ``copy`` operations in group ``i``. 
-      Each element in ``group_size`` must be at least 0.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Array of pointers holding the ``Y`` vectors, overwritten by ``total_batch_count`` ``copy`` operations.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event copy_batch(sycl::queue &queue,
-                              std::int64_t n,
-                              const T *x,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T *y,
-                              std::int64_t incy,
-                              std::int64_t stridey,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event copy_batch(sycl::queue &queue,
-                              std::int64_t n,
-                              const T *x,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T *y,
-                              std::int64_t incy,
-                              std::int64_t stridey,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in ``X`` and ``Y``.
-
-   x
-      Pointer to input vectors ``X`` with size ``stridex`` * ``batch_size``.
-
-   incx 
-      Stride of vector ``X``.
-   
-   stridex 
-      Stride between different ``X`` vectors.
-
-   y
-      Pointer to input/output vectors ``Y`` with size ``stridey`` * ``batch_size``.
-
-   incy 
-      Stride of vector ``Y``.
-   
-   stridey 
-      Stride between different ``Y`` vectors.
-
-   batch_size 
-      Specifies the number of ``copy`` operations to perform.
-  
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Output vectors, overwritten by ``batch_size`` ``copy`` operations
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:**:ref:`blas-like-extensions`
diff --git a/docs/domains/blas/dgmm_batch.rst b/docs/domains/blas/dgmm_batch.rst
deleted file mode 100644
index 25eaace9f..000000000
--- a/docs/domains/blas/dgmm_batch.rst
+++ /dev/null
@@ -1,462 +0,0 @@
-.. _onemkl_blas_dgmm_batch:
-
-dgmm_batch
-==========
-
-Computes a group of ``dgmm`` operations.
-
-.. _onemkl_blas_dgmm_batch_description:
-
-.. rubric:: Description
-
-The ``dgmm_batch`` routines perform
-multiple diagonal matrix-matrix product operations in a single call.
-   
-``dgmm_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_dgmm_batch_buffer:
-
-dgmm_batch (Buffer Version)
----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``dgmm_batch`` supports only the strided API. 
-
-The strided API operation is defined as:
-::
-
-   for i = 0 … batch_size – 1
-       A and C are matrices at offset i * stridea in a, i * stridec in c.
-       X is a vector at offset i * stridex in x
-       C := diag(X) * A or  C = A * diag(X)
-   end for
-
-where:
-
-``A`` is a matrix,
-
-``X`` is a diagonal matrix stored as a vector
-
-The ``a`` and ``x`` buffers contain all the input matrices. The stride 
-between matrices is given by the stride parameter. The total number
-of matrices in ``a`` and ``x`` buffers is given by the ``batch_size`` parameter.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void dgmm_batch(sycl::queue &queue,
-                       onemkl::mkl::side left_right,
-                       std::int64_t m,
-                       std::int64_t n,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       sycl::buffer<T,1> &c,
-                       std::int64_t ldc,
-                       std::int64_t stridec,
-                       std::int64_t batch_size)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void dgmm_batch(sycl::queue &queue,
-                       onemkl::mkl::side left_right,
-                       std::int64_t m,
-                       std::int64_t n,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       sycl::buffer<T,1> &c,
-                       std::int64_t ldc,
-                       std::int64_t stridec,
-                       std::int64_t batch_size)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies the position of the diagonal matrix in the product.
-      See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of matrices ``A`` and ``C``. Must be at least zero.
-
-   n
-      Number of columns of matrices ``A`` and ``C``. Must be at least zero.
-
-   a
-
-      Buffer holding the input matrices ``A`` with size ``stridea`` *
-      ``batch_size``.  Must be of at least ``lda`` * ``j`` +
-      ``stridea`` * (``batch_size`` - 1) where j is n if column major
-      layout is used or m if major layout is used.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive
-      and at least ``m`` if column major layout is used or at least
-      ``n`` if row major layout is used.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   x
-      Buffer holding the input matrices ``X`` with size ``stridex`` *
-      ``batch_size``.  Must be of size at least 
-      (1 + (``len`` - 1)*abs(``incx``)) + ``stridex`` * (``batch_size`` - 1) 
-      where ``len`` is ``n`` if the diagonal matrix is on the right 
-      of the product or ``m`` otherwise.
-
-   incx
-      Stride between two consecutive elements of the ``x`` vectors.
-
-   stridex
-      Stride between different ``X`` vectors, must be at least 0.
-
-   c
-      Buffer holding input/output matrices ``C`` with size ``stridec`` * ``batch_size``.
-
-   ldc
-      The leading dimension of the matrices ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-   stridec
-      Stride between different ``C`` matrices. Must be at least
-      ``ldc`` * ``n`` if column major layout is used or ``ldc`` * ``m`` if row
-      major layout is used.
-
-   batch_size
-      Specifies the number of diagonal matrix-matrix product operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output overwritten by ``batch_size`` diagonal matrix-matrix product
-      operations.
-
-
-.. _onemkl_blas_dgmm_batch_usm:
-
-dgmm_batch (USM Version)
----------------------------
-
-.. rubric:: Description
-
-The USM version of ``dgmm_batch`` supports the group API and strided API. 
-
-The group API operation is defined as:
-::
-
-   idx = 0
-   for i = 0 … group_count – 1
-       for j = 0 … group_size – 1
-           a and c are matrices of size mxn at position idx in a_array and c_array
-           x is a vector of size m or n depending on left_right, at position idx in x_array
-           if (left_right == oneapi::mkl::side::left)
-               c := diag(x) * a
-           else
-               c := a * diag(x)
-           idx := idx + 1
-       end for
-   end for
-
-The strided API operation is defined as
-::
-
-   for i = 0 … batch_size – 1
-       A and C are matrices at offset i * stridea in a, i * stridec in c.
-       X is a vector at offset i * stridex in x
-       C := diag(X) * A or  C = A * diag(X)
-   end for
-
-where:
-
-``A`` is a matrix,
-
-``X`` is a diagonal matrix stored as a vector
-
-The ``a`` and ``x`` buffers contain all the input matrices. The stride 
-between matrices is given by the stride parameter. The total number
-of matrices in ``a`` and ``x`` buffers is given by the ``batch_size`` parameter.
- 
-For group API, ``a`` and ``x`` arrays contain the pointers for all the input matrices. 
-The total number of matrices in ``a`` and ``x`` are given by: 
-
-.. math::
-
-      total\_batch\_count = \sum_{i=0}^{group\_count-1}group\_size[i]    
- 
-For strided API, ``a`` and ``x`` arrays contain all the input matrices. The total number of matrices 
-in ``a`` and ``x`` are given by the ``batch_size`` parameter.  
-   
-**Group API**
-
-.. rubric:: Syntax
-   
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event dgmm_batch(sycl::queue &queue,
-                              onemkl::mkl::side *left_right,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              const T **a,
-                              std::int64_t *lda,
-                              const T **x,
-                              std::int64_t *incx,
-                              T **c,
-                              std::int64_t *ldc,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event dgmm_batch(sycl::queue &queue,
-                              onemkl::mkl::side *left_right,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              const T **a,
-                              std::int64_t *lda,
-                              const T **x,
-                              std::int64_t *incx,
-                              T **c,
-                              std::int64_t *ldc,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies the position of the diagonal matrix in the product.
-      See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Array of ``group_count`` integers. ``m[i]`` specifies the
-      number of rows of ``A`` for every matrix in group ``i``. All entries must be at least zero.
-
-   n
-      Array of ``group_count`` integers. ``n[i]`` specifies the
-      number of columns of ``A`` for every matrix in group ``i``. All entries must be at least zero.
-
-   a
-      Array of pointers to input matrices ``A`` with size
-      ``total_batch_count``.  Must be of size at least ``lda[i]`` * ``n[i]`` if
-      column major layout is used or at least ``lda[i]`` * ``m[i]`` if row major
-      layout is used.
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      Array of ``group_count`` integers. ``lda[i]`` specifies the
-      leading dimension of ``A`` for every matrix in group ``i``. All
-      entries must be positive and at least ``m[i]`` if column major
-      layout is used or at least ``n[i]`` if row major layout is used.
-
-   x
-      Array of pointers to input vectors ``X`` with size
-      ``total_batch_count``.  Must be of size at least (1 + ``len[i]`` –
-      1)*abs(``incx[i]``)) where ``len[i]`` is ``n[i]`` if the diagonal matrix is on the
-      right of the product or ``m[i]`` otherwise.
-      See :ref:`matrix-storage` for more details.
-
-   incx
-      Array of ``group_count`` integers. ``incx[i]`` specifies the
-      stride of ``x`` for every vector in group ``i``. All entries
-      must be positive.
-   c
-      Array of pointers to input/output matrices ``C`` with size ``total_batch_count``. 
-      Must be of size at least
-      ``ldc[i]`` * ``n[i]``
-      if column major layout is used or at least
-      ``ldc[i]`` * ``m[i]``
-      if row major layout is used.
-      See :ref:`matrix-storage` for more details.
-
-   ldc
-      Array of ``group_count`` integers. ``ldc[i]`` specifies the
-      leading dimension of ``C`` for every matrix in group ``i``.  All
-      entries must be positive and ``ldc[i]`` must be at least
-      ``m[i]`` if column major layout is used to store matrices or at
-      least ``n[i]`` if row major layout is used to store matrices.
-
-   group_count
-      Specifies the number of groups. Must be at least 0.
-
-   group_size
-      Array of ``group_count`` integers. ``group_size[i]`` specifies the
-      number of diagonal matrix-matrix product operations in group ``i``.
-      All entries must be at least 0.
-
-   dependencies
-         List of events to wait for before starting computation, if any.
-         If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output overwritten by ``batch_size`` diagonal matrix-matrix product
-      operations.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event dgmm_batch(sycl::queue &queue,
-                              onemkl::mkl::side left_right,
-                              std::int64_t m,
-                              std::int64_t n,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              const T *b,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T *c,
-                              std::int64_t ldc,
-                              std::int64_t stridec,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event dgmm_batch(sycl::queue &queue,
-                              onemkl::mkl::side left_right,
-                              std::int64_t m,
-                              std::int64_t n,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              const T *b,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T *c,
-                              std::int64_t ldc,
-                              std::int64_t stridec,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies the position of the diagonal matrix in the product.
-      See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   a
-      Pointer to input matrices ``A`` with size ``stridea`` *
-      ``batch_size``.  Must be of size at least
-      ``lda`` * ``k`` + ``stridea`` * (``batch_size`` - 1) 
-      where ``k`` is ``n`` if column major layout is used 
-      or ``m`` if row major layout is used.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive
-      and at least ``m``.  Must be positive and at least ``m`` if column
-      major layout is used or at least ``n`` if row major layout is used.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   x
-      Pointer to input matrices ``X`` with size ``stridex`` * ``batch_size``.
-      Must be of size at least
-      (1 + (``len`` - 1)*abs(``incx``)) + ``stridex`` * (``batch_size`` - 1)
-      where ``len`` is ``n`` if the diagonal matrix is on the right
-      of the product or ``m`` otherwise.
-
-   incx
-      Stride between two consecutive elements of the ``x`` vector.
-
-   stridex
-      Stride between different ``X`` vectors, must be at least 0.
-
-   c
-      Pointer to input/output matrices ``C`` with size ``stridec`` * ``batch_size``.
-
-   ldc
-      The leading dimension of the matrices ``C``. It must be positive and at least
-      ``ldc`` * ``m`` if column major layout is used to store matrices or at
-      least ``ldc`` * ``n`` if column major layout is used to store matrices.
-
-   stridec
-      Stride between different ``C`` matrices. Must be at least
-      ``ldc`` * ``n`` if column major layout is used or 
-      ``ldc`` * ``m`` if row major layout is used.
-
-   batch_size
-      Specifies the number of diagonal matrix-matrix product operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output overwritten by ``batch_size`` diagonal matrix-matrix product
-      operations.
-
-.. container:: section
-      
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
diff --git a/docs/domains/blas/dot.rst b/docs/domains/blas/dot.rst
deleted file mode 100644
index 8ae352889..000000000
--- a/docs/domains/blas/dot.rst
+++ /dev/null
@@ -1,182 +0,0 @@
-.. _onemkl_blas_dot:
-
-dot
-===
-
-Computes the dot product of two real vectors.
-
-.. _onemkl_blas_dot_description:
-
-.. rubric:: Description
-
-The ``dot`` routines perform a dot product between two vectors:
-
-.. math::
-
-   result = \sum_{i=1}^{n}X_iY_i 
-
-``dot`` supports the following precisions for data.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_res 
-      * -  ``float`` 
-        -  ``float`` 
-      * -  ``double`` 
-        -  ``double`` 
-      * -  ``float`` 
-        -  ``double`` 
-
-.. container:: Note
-
-   .. rubric:: Note
-      :class: NoteTipHead
-
-   For the mixed precision version (inputs are float while result is
-   double), the dot product is computed with double precision.
-
-.. _onemkl_blas_dot_buffer:
-
-dot (Buffer Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void dot(sycl::queue &queue,
-                std::int64_t n,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &y,
-                std::int64_t incy,
-                sycl::buffer<T_res,1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void dot(sycl::queue &queue,
-                std::int64_t n,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &y,
-                std::int64_t incy,
-                sycl::buffer<T_res,1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vectors ``x`` and ``y``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at least
-      (1 + (``n`` – 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size at least
-      (1 + (``n`` – 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Buffer where the result (a scalar) will be stored.
-
-
-.. _onemkl_blas_dot_usm:
-
-dot (USM Version)
------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event dot(sycl::queue &queue,
-                       std::int64_t n,
-                       const T *x,
-                       std::int64_t incx,
-                       const T *y,
-                       std::int64_t incy,
-                       T_res *result,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event dot(sycl::queue &queue,
-                       std::int64_t n,
-                       const T *x,
-                       std::int64_t incx,
-                       const T *y,
-                       std::int64_t incy,
-                       T_res *result,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vectors ``x`` and ``y``.
-
-   x
-      Pointer to the input vector ``x``. The array holding the vector ``x``
-      must be of size at least (1 + (``n`` – 1)*abs(``incx``)). See
-      :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to the input vector ``y``. The array holding the vector ``y``
-      must be of size at least (1 + (``n`` – 1)*abs(``incy``)). See
-      :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Pointer to where the result (a scalar) will be stored.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/dotc.rst b/docs/domains/blas/dotc.rst
deleted file mode 100644
index d6746f124..000000000
--- a/docs/domains/blas/dotc.rst
+++ /dev/null
@@ -1,170 +0,0 @@
-.. _onemkl_blas_dotc:
-
-dotc
-====
-
-Computes the dot product of two complex vectors, conjugating the first vector.
-
-.. _onemkl_blas_dotc_description:
-
-.. rubric:: Description
-
-The ``dotc`` routines perform a dot product between two complex
-vectors, conjugating the first of them:
-
-.. math::
-
-   result = \sum_{i=1}^{n}\overline{X_i}Y_i 
-
-``dotc`` supports the following precisions for data.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_dotc_buffer:
-
-dotc (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void dotc(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void dotc(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      The number of elements in vectors ``x`` and ``y``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      The stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details..
-
-   incy
-      The stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      The buffer where the result (a scalar) is stored.
-
-
-.. _onemkl_blas_dotc_usm:
-
-dotc (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void dotc(sycl::queue &queue,
-                 std::int64_t n,
-                 const T *x,
-                 std::int64_t incx,
-                 const T *y,
-                 std::int64_t incy,
-                 T *result,
-                 const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void dotc(sycl::queue &queue,
-                 std::int64_t n,
-                 const T *x,
-                 std::int64_t incx,
-                 const T *y,
-                 std::int64_t incy,
-                 T *result,
-                 const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      The number of elements in vectors ``x`` and ``y``.
-
-   x
-      Pointer to input vector ``x``. The array holding the input
-      vector ``x`` must be of size at least (1 + (``n`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      The stride of vector ``x``.
-
-   y
-      Pointer to input vector ``y``. The array holding the input
-      vector ``y`` must be of size at least (1 + (``n`` -
-      1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details..
-
-   incy
-      The stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      The pointer to where the result (a scalar) is stored.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/dotu.rst b/docs/domains/blas/dotu.rst
deleted file mode 100644
index d936815ae..000000000
--- a/docs/domains/blas/dotu.rst
+++ /dev/null
@@ -1,170 +0,0 @@
-.. _onemkl_blas_dotu:
-
-dotu
-====
-
-Computes the dot product of two complex vectors.
-
-.. _onemkl_blas_dotu_description:
-
-.. rubric:: Description
-
-The ``dotu`` routines perform a dot product between two complex vectors:
-
-.. math::
-
-   result = \sum_{i=1}^{n}X_iY_i 
-
-``dotu`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_dotu_buffer:
-
-dotu (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void dotu(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void dotu(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vectors ``x`` and ``y``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Buffer where the result (a scalar) is stored.
-
-
-.. _onemkl_blas_dotu_usm:
-
-dotu (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event dotu(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *result,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event dotu(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *result,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vectors ``x`` and ``y``.
-
-   x
-      Pointer to the input vector ``x``. The array holding input
-      vector ``x`` must be of size at least (1 + (``n`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input vector ``y``. The array holding input vector
-      ``y`` must be of size at least (1 + (``n`` - 1)*abs(``incy``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Pointer to where the result (a scalar) is stored.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/gbmv.rst b/docs/domains/blas/gbmv.rst
deleted file mode 100644
index 366ce69f2..000000000
--- a/docs/domains/blas/gbmv.rst
+++ /dev/null
@@ -1,285 +0,0 @@
-.. _onemkl_blas_gbmv:
-
-gbmv
-====
-
-Computes a matrix-vector product with a general band matrix.
-
-.. _onemkl_blas_gbmv_description:
-
-.. rubric:: Description
-
-The ``gbmv`` routines compute a scalar-matrix-vector product and add
-the result to a scalar-vector product, with a general band matrix.
-The operation is defined as
-
-.. math::
-      
-      y \leftarrow alpha*op(A)*x + beta*y
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``m``-by-``n`` matrix with ``kl`` sub-diagonals and
-``ku`` super-diagonals,
-
-``x`` and ``y`` are vectors.
-
-``gbmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_gbmv_buffer:
-
-gbmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gbmv(sycl::queue &queue,
-                 onemkl::transpose trans,
-                 std::int64_t m,
-                 std::int64_t n,
-                 std::int64_t kl,
-                 std::int64_t ku,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gbmv(sycl::queue &queue,
-                 onemkl::transpose trans,
-                 std::int64_t m,
-                 std::int64_t n,
-                 std::int64_t kl,
-                 std::int64_t ku,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``.
-      See
-      :ref:`onemkl_datatypes` for more
-      details.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   kl
-      Number of sub-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   ku
-      Number of super-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least ``lda``\ \*\ ``n``
-      if column major layout is used or at least ``lda``\ \*\ ``m``
-      if row major layout is used. See :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``kl`` +
-      ``ku`` + 1), and positive.
-
-   x
-      Buffer holding input vector ``x``. The length ``len`` of vector
-      ``x`` is ``n`` if ``A`` is not transposed, and ``m`` if ``A`` is
-      transposed. The buffer must be of size at least (1 + (``len`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Buffer holding input/output vector ``y``. The length ``len`` of
-      vector ``y`` is ``m``, if ``A`` is not transposed, and ``n`` if
-      ``A`` is transposed. The buffer must be of size at least (1 +
-      (``len`` - 1)*abs(``incy``)) where ``len`` is this length. See
-      :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_gbmv_usm:
-
-gbmv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gbmv(sycl::queue &queue,
-                        onemkl::transpose trans,
-                        std::int64_t m,
-                        std::int64_t n,
-                        std::int64_t kl,
-                        std::int64_t ku,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gbmv(sycl::queue &queue,
-                        onemkl::transpose trans,
-                        std::int64_t m,
-                        std::int64_t n,
-                        std::int64_t kl,
-                        std::int64_t ku,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See
-      :ref:`onemkl_datatypes` for
-      more details.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   kl
-      Number of sub-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   ku
-      Number of super-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n`` if column
-      major layout is used or at least ``lda``\ \*\ ``m`` if row
-      major layout is used. See :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``kl`` +
-      ``ku`` + 1), and positive.
-
-   x
-      Pointer to input vector ``x``. The length ``len`` of vector
-      ``x`` is ``n`` if ``A`` is not transposed, and ``m`` if ``A``
-      is transposed. The array holding input vector ``x`` must be of
-      size at least (1 + (``len`` - 1)*abs(``incx``)). See 
-      :ref:`matrix-storage` for more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Pointer to input/output vector ``y``. The length ``len`` of
-      vector ``y`` is ``m``, if ``A`` is not transposed, and ``n`` if
-      ``A`` is transposed. The array holding input/output vector
-      ``y`` must be of size at least (1 + (``len`` -
-      1)*abs(``incy``)) where ``len`` is this length. 
-      See :ref:`matrix-storage` for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/gemm.rst b/docs/domains/blas/gemm.rst
deleted file mode 100644
index e9e2c89ea..000000000
--- a/docs/domains/blas/gemm.rst
+++ /dev/null
@@ -1,455 +0,0 @@
-.. _onemkl_blas_gemm:
-
-gemm
-====
-
-Computes a matrix-matrix product with general matrices.
-
-.. _onemkl_blas_gemm_description:
-
-.. rubric:: Description
-
-The ``gemm`` routines compute a scalar-matrix-matrix product and add the
-result to a scalar-matrix product, with general matrices. The
-operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*op(A)*op(B) + beta*C
-
-where:
-
-op(``X``) is one of op(``X``) = ``X``, or op(``X``) = ``X``\ :sup:`T`, or
-op(``X``) = ``X``\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A``, ``B`` and ``C`` are matrices,
-
-``op(A)`` is an ``m``-by-``k`` matrix,
-
-``op(B)`` is a ``k``-by-``n`` matrix,
-
-``C`` is an ``m``-by-``n`` matrix.
-
-``gemm`` supports the following precisions.
-
-   .. list-table:: 
-     :header-rows: 1
-
-     * -  Ts 
-       -  Ta 
-       -  Tb 
-       -  Tc 
-     * -  ``float`` 
-       -  ``half`` 
-       -  ``half`` 
-       -  ``float`` 
-     * -  ``half`` 
-       -  ``half`` 
-       -  ``half`` 
-       -  ``half`` 
-     * -  ``float``
-       -  ``bfloat16``
-       -  ``bfloat16``
-       -  ``float``
-     * -  ``float`` 
-       -  ``float`` 
-       -  ``float`` 
-       -  ``float`` 
-     * -  ``double`` 
-       -  ``double`` 
-       -  ``double`` 
-       -  ``double`` 
-     * -  ``std::complex<float>`` 
-       -  ``std::complex<float>`` 
-       -  ``std::complex<float>`` 
-       -  ``std::complex<float>`` 
-     * -  ``std::complex<double>`` 
-       -  ``std::complex<double>`` 
-       -  ``std::complex<double>`` 
-       -  ``std::complex<double>`` 
-
-.. _onemkl_blas_gemm_buffer:
-
-gemm (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gemm(sycl::queue &queue,
-                 onemkl::transpose transa,
-                 onemkl::transpose transb,
-                 std::int64_t m,
-                 std::int64_t n,
-                 std::int64_t k,
-                 Ts alpha,
-                 sycl::buffer<Ta,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<Tb,1> &b,
-                 std::int64_t ldb,
-                 Ts beta,
-                 sycl::buffer<Tc,1> &c,
-                 std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gemm(sycl::queue &queue,
-                 onemkl::transpose transa,
-                 onemkl::transpose transb,
-                 std::int64_t m,
-                 std::int64_t n,
-                 std::int64_t k,
-                 Ts alpha,
-                 sycl::buffer<Ta,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<Tb,1> &b,
-                 std::int64_t ldb,
-                 Ts beta,
-                 sycl::buffer<Tc,1> &c,
-                 std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   transa
-      Specifies the form of op(``A``), the transposition operation
-      applied to ``A``.
-
-   transb
-      Specifies the form of op(``B``), the transposition operation
-      applied to ``B``.
-
-   m
-      Specifies the number of rows of the matrix op(``A``) and of the
-      matrix ``C``. The value of m must be at least zero.
-
-   n
-      Specifies the number of columns of the matrix op(``B``) and the
-      number of columns of the matrix ``C``. The value of n must be at
-      least zero.
-
-   k
-      Specifies the number of columns of the matrix op(``A``) and the
-      number of rows of the matrix op(``B``). The value of k must be at
-      least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      The buffer holding the input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``
-         * - Row major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``
-
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``m``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``m``.
-             
-   b
-      The buffer holding the input matrix ``B``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``
-      
-      See :ref:`matrix-storage` for more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      The buffer holding the input/output matrix ``C``. It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices . See :ref:`matrix-storage` for more details.
-
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      The buffer, which is overwritten by
-      ``alpha``\ \*\ op(``A``)*op(``B``) + ``beta``\ \*\ ``C``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized before
-   calling ``gemm``.
-
-
-.. _onemkl_blas_gemm_usm:
-
-gemm (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemm(sycl::queue &queue,
-                        onemkl::transpose transa,
-                        onemkl::transpose transb,
-                        std::int64_t m,
-                        std::int64_t n,
-                        std::int64_t k,
-                        Ts alpha,
-                        const Ta *a,
-                        std::int64_t lda,
-                        const Tb *b,
-                        std::int64_t ldb,
-                        Ts beta,
-                        Tc *c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemm(sycl::queue &queue,
-                        onemkl::transpose transa,
-                        onemkl::transpose transb,
-                        std::int64_t m,
-                        std::int64_t n,
-                        std::int64_t k,
-                        Ts alpha,
-                        const Ta *a,
-                        std::int64_t lda,
-                        const Tb *b,
-                        std::int64_t ldb,
-                        Ts beta,
-                        Tc *c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   transa
-      Specifies the form of op(``A``), the transposition operation
-      applied to ``A``.
-
-
-   transb
-      Specifies the form of op(``B``), the transposition operation
-      applied to ``B``.
-
-
-   m
-      Specifies the number of rows of the matrix op(``A``) and of the
-      matrix ``C``. The value of m must be at least zero.
-
-
-   n
-      Specifies the number of columns of the matrix op(``B``) and the
-      number of columns of the matrix ``C``. The value of n must be
-      at least zero.
-
-
-   k
-      Specifies the number of columns of the matrix op(``A``) and the
-      number of rows of the matrix op(``B``). The value of k must be
-      at least zero.
-
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-
-   a
-      Pointer to input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``
-         * - Row major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``
-             
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``m``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``m``.
-             
-   b
-      Pointer to input matrix ``B``.
-      
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``
-   
-      See :ref:`matrix-storage` for more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      The pointer to input/output matrix ``C``. It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices . See :ref:`matrix-storage` for more details.
-
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by
-      ``alpha``\ \*\ op(``A``)*op(``B``) + ``beta``\ \*\ ``C``.
- 
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling ``gemm``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/gemm_batch.rst b/docs/domains/blas/gemm_batch.rst
deleted file mode 100644
index e63129fdd..000000000
--- a/docs/domains/blas/gemm_batch.rst
+++ /dev/null
@@ -1,606 +0,0 @@
-.. _onemkl_blas_gemm_batch:
-
-gemm_batch
-==========
-
-Computes a group of ``gemm`` operations.
-
-.. _onemkl_blas_gemm_batch_description:
-
-.. rubric:: Description
-
-The ``gemm_batch`` routines are batched versions of :ref:`onemkl_blas_gemm`, performing
-multiple ``gemm`` operations in a single call. Each ``gemm`` 
-operation perform a matrix-matrix product with general matrices.
-   
-``gemm_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``half``
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_gemm_batch_buffer:
-
-gemm_batch (Buffer Version)
----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``gemm_batch`` supports only the strided API. 
-
-The strided API operation is defined as:
-::
-
-   for i = 0 … batch_size – 1
-       A, B and C are matrices at offset i * stridea, i * strideb, i * stridec in a, b and c.
-       C := alpha * op(A) * op(B) + beta * C
-   end for
-
-where:
-
-op(X) is one of op(X) = X, or op(X) = X\ :sup:`T`, or op(X) = X\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A``, ``B``, and ``C`` are matrices,
-
-op(``A``) is ``m`` x ``k``, op(``B``) is 
-``k`` x ``n``, and ``C`` is ``m`` x ``n``.
-
-The ``a``, ``b`` and ``c`` buffers contain all the input matrices. The stride 
-between matrices is given by the stride parameter. The total number
-of matrices in ``a``, ``b`` and ``c`` buffers is given by the ``batch_size`` parameter.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gemm_batch(sycl::queue &queue,
-                       onemkl::transpose transa,
-                       onemkl::transpose transb,
-                       std::int64_t m,
-                       std::int64_t n,
-                       std::int64_t k,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &b,
-                       std::int64_t ldb,
-                       std::int64_t strideb,
-                       T beta,
-                       sycl::buffer<T,1> &c,
-                       std::int64_t ldc,
-                       std::int64_t stridec,
-                       std::int64_t batch_size)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gemm_batch(sycl::queue &queue,
-                       onemkl::transpose transa,
-                       onemkl::transpose transb,
-                       std::int64_t m,
-                       std::int64_t n,
-                       std::int64_t k,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &b,
-                       std::int64_t ldb,
-                       std::int64_t strideb,
-                       T beta,
-                       sycl::buffer<T,1> &c,
-                       std::int64_t ldc,
-                       std::int64_t stridec,
-                       std::int64_t batch_size)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   transa
-      Specifies op(``A``) the transposition operation applied to the
-      matrices ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   transb
-      Specifies op(``B``) the transposition operation applied to the
-      matrices ``B``. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of op(``A``) and ``C``. Must be at least zero.
-
-
-   n
-      Number of columns of op(``B``) and ``C``. Must be at least zero.
-
-
-   k
-      Number of columns of op(``A``) and rows of op(``B``). Must be at
-      least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix products.
-
-   a
-      Buffer holding the input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``m``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``m``.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   b
-      Buffer holding the input matrices ``B`` with size ``strideb`` * ``batch_size``.
-
-   ldb
-      The leading dimension of the matrices``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-
-   strideb
-      Stride between different ``B`` matrices.
-
-   beta
-      Scaling factor for the matrices ``C``.
-
-   c
-      Buffer holding input/output matrices ``C`` with size ``stridec`` * ``batch_size``.
-
-   ldc
-      The leading dimension of the matrices ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   stridec
-      Stride between different ``C`` matrices. Must be at least
-      ``ldc`` * ``n``.
-
-   batch_size
-      Specifies the number of matrix multiply operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by ``batch_size`` matrix multiply
-      operations of the form ``alpha`` * op(``A``)*op(``B``) + ``beta`` * ``C``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized before
-   calling ``gemm_batch``.
-
-
-.. _onemkl_blas_gemm_batch_usm:
-
-gemm_batch (USM Version)
----------------------------
-
-.. rubric:: Description
-
-The USM version of ``gemm_batch`` supports the group API and strided API.
-
-The group API operation is defined as:
-::
-
-   idx = 0
-   for i = 0 … group_count – 1
-       for j = 0 … group_size – 1
-           A, B, and C are matrices in a[idx], b[idx] and c[idx]
-           C := alpha[i] * op(A) * op(B) + beta[i] * C
-           idx = idx + 1
-       end for
-   end for
-
-The strided API operation is defined as
-::
-
-   for i = 0 … batch_size – 1
-       A, B and C are matrices at offset i * stridea, i * strideb, i * stridec in a, b and c.
-       C := alpha * op(A) * op(B) + beta * C
-   end for
-
-where:
-
-op(X) is one of op(X) = X, or op(X) = X\ :sup:`T`, or op(X) = X\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A``, ``B``, and ``C`` are matrices,
-
-op(``A``) is ``m`` x ``k``, op(``B``) is ``k`` x ``n``, and ``C`` is ``m`` x ``n``.
-
- 
-For group API, ``a``, ``b`` and ``c`` arrays contain the pointers for all the input matrices. 
-The total number of matrices in ``a``, ``b`` and ``c`` are given by: 
-
-.. math::
-
-      total\_batch\_count = \sum_{i=0}^{group\_count-1}group\_size[i]    
- 
-For strided API, ``a``, ``b``, ``c`` arrays contain all the input matrices. The total number of matrices 
-in ``a``, ``b`` and ``c`` are given by the ``batch_size`` parameter.  
-   
-**Group API**
-
-.. rubric:: Syntax
-   
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemm_batch(sycl::queue &queue,
-                              onemkl::transpose *transa,
-                              onemkl::transpose *transb,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              std::int64_t *k,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              const T **b,
-                              std::int64_t *ldb,
-                              T *beta,
-                              T **c,
-                              std::int64_t *ldc,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemm_batch(sycl::queue &queue,
-                              onemkl::transpose *transa,
-                              onemkl::transpose *transb,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              std::int64_t *k,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              const T **b,
-                              std::int64_t *ldb,
-                              T *beta,
-                              T **c,
-                              std::int64_t *ldc,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   transa
-      Array of ``group_count`` ``onemkl::transpose`` values. ``transa[i]`` specifies the form of op(``A``) used in
-      the matrix multiplication in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   transb
-      Array of ``group_count`` ``onemkl::transpose`` values. ``transb[i]`` specifies the form of op(``B``) used in
-      the matrix multiplication in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Array of ``group_count`` integers. ``m[i]`` specifies the
-      number of rows of op(``A``) and ``C`` for every matrix in group ``i``. All entries must be at least zero.
-
-   n
-      Array of ``group_count`` integers. ``n[i]`` specifies the
-      number of columns of op(``B``) and ``C`` for every matrix in group ``i``. All entries must be at least zero.
-
-   k
-      Array of ``group_count`` integers. ``k[i]`` specifies the
-      number of columns of op(``A``) and rows of op(``B``) for every matrix in group ``i``. All entries must be at
-      least zero.
-
-   alpha
-      Array of ``group_count`` scalar elements. ``alpha[i]`` specifies the scaling factor for every matrix-matrix
-      product in group ``i``.
-
-   a
-      Array of pointers to input matrices ``A`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      Array of ``group_count`` integers. ``lda[i]`` specifies the
-      leading dimension of ``A`` for every matrix in group ``i``. All
-      entries must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda[i]`` must be at least ``m[i]``.
-           - ``lda[i]`` must be at least ``k[i]``.
-         * - Row major
-           - ``lda[i]`` must be at least ``k[i]``.
-           - ``lda[i]`` must be at least ``m[i]``.
-             
-   b
-      Array of pointers to input matrices ``B`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   ldb
-      Array of ``group_count`` integers. ``ldb[i]`` specifies the
-      leading dimension of ``B`` for every matrix in group ``i``. All
-      entries must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb[i]`` must be at least ``k[i]``.
-           - ``ldb[i]`` must be at least ``n[i]``.
-         * - Row major
-           - ``ldb[i]`` must be at least ``n[i]``.
-           - ``ldb[i]`` must be at least ``k[i]``.
-             
-   beta
-      Array of ``group_count`` scalar elements. ``beta[i]`` specifies the scaling factor for matrix ``C`` 
-      for every matrix in group ``i``.
-
-   c
-      Array of pointers to input/output matrices ``C`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   ldc
-      Array of ``group_count`` integers. ``ldc[i]`` specifies the
-      leading dimension of ``C`` for every matrix in group ``i``.  All
-      entries must be positive and ``ldc[i]`` must be at least
-      ``m[i]`` if column major layout is used to store matrices or at
-      least ``n[i]`` if row major layout is used to store matrices.
-
-   group_count
-      Specifies the number of groups. Must be at least 0.
-
-   group_size
-      Array of ``group_count`` integers. ``group_size[i]`` specifies the
-      number of matrix multiply products in group ``i``. All entries must be at least 0.
-
-   dependencies
-         List of events to wait for before starting computation, if any.
-         If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Overwritten by the ``m[i]``-by-``n[i]`` matrix calculated by 
-      (``alpha[i]`` * op(``A``)*op(``B``) + ``beta[i]`` * ``C``) for group ``i``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling ``gemm_batch``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemm_batch(sycl::queue &queue,
-                              onemkl::transpose transa,
-                              onemkl::transpose transb,
-                              std::int64_t m,
-                              std::int64_t n,
-                              std::int64_t k,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              const T *b,
-                              std::int64_t ldb,
-                              std::int64_t strideb,
-                              T beta,
-                              T *c,
-                              std::int64_t ldc,
-                              std::int64_t stridec,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemm_batch(sycl::queue &queue,
-                              onemkl::transpose transa,
-                              onemkl::transpose transb,
-                              std::int64_t m,
-                              std::int64_t n,
-                              std::int64_t k,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              const T *b,
-                              std::int64_t ldb,
-                              std::int64_t strideb,
-                              T beta,
-                              T *c,
-                              std::int64_t ldc,
-                              std::int64_t stridec,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   transa
-      Specifies op(``A``) the transposition operation applied to the
-      matrices ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   transb
-      Specifies op(``B``) the transposition operation applied to the
-      matrices ``B``. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of op(``A``) and ``C``. Must be at least zero.
-
-   n
-      Number of columns of op(``B``) and ``C``. Must be at least zero.
-
-   k
-      Number of columns of op(``A``) and rows of op(``B``). Must be at
-      least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix products.
-
-   a
-      Pointer to input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``m``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``m``.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   b
-      Pointer to input matrices ``B`` with size ``strideb`` * ``batch_size``.
-
-   ldb
-      The leading dimension of the matrices``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-
-   strideb
-      Stride between different ``B`` matrices.
-
-   beta
-      Scaling factor for the matrices ``C``.
-
-   c
-      Pointer to input/output matrices ``C`` with size ``stridec`` * ``batch_size``.
-
-   ldc
-      The leading dimension of the matrices ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   stridec
-      Stride between different ``C`` matrices.
-
-   batch_size
-      Specifies the number of matrix multiply operations to perform.
-
-   dependencies
-         List of events to wait for before starting computation, if any.
-         If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output matrices, overwritten by ``batch_size`` matrix multiply
-      operations of the form ``alpha`` * op(``A``)*op(``B``) + ``beta`` * ``C``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized before
-   calling ``gemm_batch``.
-
-.. container:: section
-      
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
diff --git a/docs/domains/blas/gemm_bias.rst b/docs/domains/blas/gemm_bias.rst
deleted file mode 100644
index dd7ce4ecc..000000000
--- a/docs/domains/blas/gemm_bias.rst
+++ /dev/null
@@ -1,513 +0,0 @@
-.. _onemkl_blas_gemm_bias:
-
-gemm_bias
-=========
-
-Computes a matrix-matrix product using general integer matrices with bias.
-
-.. _onemkl_blas_gemm_bias_description:
-
-.. rubric:: Description
-
-The gemm_bias routines compute a scalar-matrix-matrix product and
-add the result to a scalar-matrix product, using general integer matrices with biases/offsets. 
-The operation is defined as:
-
-.. math::
-      
-      \scriptstyle C \leftarrow alpha*(op(A) - A\_offset)*(op(B) - B\_offset) + beta*C + C\_offset
-
-where:
-
-op(``X``) is one of op(``X``) = ``X``, or op(``X``) = ``X``\ :sup:`T`, or
-op(``X``) = ``X``\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A_offset`` is an ``m``-by-``k`` matrix with every element equal to the value ao,
-
-``B_offset`` is a ``k``-by-``n`` matrix with every element equal to the value bo,
-
-``C_offset`` is an ``m``-by-``n`` matrix defined by the 
-co buffer as described below, 
-
-``A``, ``B``, and ``C`` are matrices,
-
-op(``A``) is ``m`` x ``k``, op(``B``) is ``k`` x ``n``, and
-``C`` is ``m`` x ``n``.
-
-``gemm_bias`` supports the following precisions.
-
-  .. list-table:: 
-     :header-rows: 1
-
-     * -  Ts 
-       -  Ta 
-       -  Tb 
-       -  Tc 
-     * -  ``float`` 
-       -  ``std::uint8_t`` 
-       -  ``std::uint8_t`` 
-       -  ``std::int32_t`` 
-     * -  ``float`` 
-       -  ``std::int8_t`` 
-       -  ``std::uint8_t`` 
-       -  ``std::int32_t`` 
-     * -  ``float`` 
-       -  ``std::uint8_t`` 
-       -  ``std::int8_t`` 
-       -  ``std::int32_t`` 
-     * -  ``float`` 
-       -  ``std::int8_t`` 
-       -  ``std::int8_t`` 
-       -  ``std::int32_t`` 
-
-.. _onemkl_blas_gemm_bias_buffer:
-
-gemm_bias (Buffer Version)
---------------------------
-
-.. rubric:: Syntax
-      
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gemm_bias(sycl::queue &queue,
-                      onemkl::transpose transa,
-                      onemkl::transpose transb,
-                      onemkl::offset offset_type,
-                      std::int64_t m,
-                      std::int64_t n,
-                      std::int64_t k,
-                      Ts alpha,
-                      sycl::buffer<Ta,1> &a,
-                      std::int64_t lda,
-                      Ta ao,
-                      sycl::buffer<Tb,1> &b,
-                      std::int64_t ldb,
-                      Tb bo,
-                      Ts beta,
-                      sycl::buffer<Tc,1> &c,
-                      std::int64_t ldc,
-                      sycl::buffer<Tc,1> &co)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gemm_bias(sycl::queue &queue,
-                      onemkl::transpose transa,
-                      onemkl::transpose transb,
-                      onemkl::offset offset_type,
-                      std::int64_t m,
-                      std::int64_t n,
-                      std::int64_t k,
-                      Ts alpha,
-                      sycl::buffer<Ta,1> &a,
-                      std::int64_t lda,
-                      Ta ao,
-                      sycl::buffer<Tb,1> &b,
-                      std::int64_t ldb,
-                      Tb bo,
-                      Ts beta,
-                      sycl::buffer<Tc,1> &c,
-                      std::int64_t ldc,
-                      sycl::buffer<Tc,1> &co)
-   }
-      
-.. container:: section
-   
-   .. rubric:: Input Parameters
- 
-   queue
-      The queue where the routine should be executed.
- 
-   transa
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See
-      :ref:`onemkl_datatypes` for
-      more details.
- 
-   transb
-      Specifies op(``B``), the transposition operation applied to
-      ``B``. See
-      :ref:`onemkl_datatypes` for
-      more details.
- 
-   offset_type
-      Specifies the form of ``C_offset`` used in the matrix
-      multiplication. See
-      :ref:`onemkl_datatypes` for
-      more details.
- 
-   m
-      Number of rows of op(``A``) and ``C``. Must be at least zero.
- 
-   n
-      Number of columns of op(``B``) and ``C``. Must be at least
-      zero.
- 
-   k
-      Number of columns of op(``A``) and rows of op(``B``). Must be
-      at least zero.
- 
-   alpha
-      Scaling factor for the matrix-matrix product.
- 
-   a
-      The buffer holding the input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``
-         * - Row major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``
-
-      See :ref:`matrix-storage` for more details.
- 
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``m``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``m``.
- 
-   ao 
-      Specifies the scalar offset value for matrix ``A``.
- 
-   b
-      Buffer holding the input matrix ``B``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``
-   
-      See :ref:`matrix-storage` for more details.
- 
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
- 
-   bo 
-      Specifies the scalar offset value for matrix ``B``.
- 
-   beta
-      Scaling factor for matrix ``C``.
- 
-   c
-      Buffer holding the input/output matrix ``C``.  It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices . 
-      See :ref:`matrix-storage` for more details.
- 
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-      
-   co
-      Buffer holding the offset values for matrix ``C``.
- 
-      If ``offset_type`` = ``offset::fix``, the ``co`` array must have
-      size at least 1.
- 
- 
-      If ``offset_type`` = ``offset::col``, the ``co`` array must have
-      size at least ``max(1,m)``.
- 
- 
-      If ``offset_type`` = ``offset::row``, the ``co`` array must have
-      size at least ``max(1,n)``. 
- 
-.. container:: section
- 
-   .. rubric:: Output Parameters
- 
-   c
-      Output buffer, overwritten by ``alpha`` * (op(``A``) -
-      ``A_offset``)*(op(``B``) - ``B_offset``) + ``beta`` * ``C`` + ``C_offset``.
- 
-.. container:: section
- 
-   .. rubric:: Notes
- 
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling ``gemm_bias``.
-
-
-.. _onemkl_blas_gemm_bias_usm:
-
-gemm_bias (USM Version)
------------------------
-
-.. rubric:: Syntax
-      
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemm_bias(sycl::queue &queue,
-                             onemkl::transpose transa,
-                             onemkl::transpose transb,
-                             onemkl::offset offset_type,
-                             std::int64_t m,
-                             std::int64_t n,
-                             std::int64_t k,
-                             Ts alpha,
-                             const Ta *a,
-                             std::int64_t lda,
-                             Ta ao,
-                             const Tb *b,
-                             std::int64_t ldb,
-                             Tb bo,
-                             Ts beta,
-                             Tc *c,
-                             std::int64_t ldc,
-                             const Tc *co,
-                             const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemm_bias(sycl::queue &queue,
-                             onemkl::transpose transa,
-                             onemkl::transpose transb,
-                             onemkl::offset offset_type,
-                             std::int64_t m,
-                             std::int64_t n,
-                             std::int64_t k,
-                             Ts alpha,
-                             const Ta *a,
-                             std::int64_t lda,
-                             Ta ao,
-                             const Tb *b,
-                             std::int64_t ldb,
-                             Tb bo,
-                             Ts beta,
-                             Tc *c,
-                             std::int64_t ldc,
-                             const Tc *co,
-                             const std::vector<sycl::event> &dependencies = {})
-   }
-      
-.. container:: section
-   
-   .. rubric:: Input Parameters
- 
-   queue
-      The queue where the routine should be executed.
- 
-   transa
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See
-      :ref:`onemkl_datatypes` for
-      more details.
- 
-   transb
-      Specifies op(``B``), the transposition operation applied to
-      ``B``. See
-      :ref:`onemkl_datatypes` for
-      more details.
- 
-   offset_type
-      Specifies the form of ``C_offset`` used in the matrix
-      multiplication. See
-      :ref:`onemkl_datatypes` for
-      more details.
- 
-   m
-      Number of rows of op(``A``) and ``C``. Must be at least zero.
- 
-   n
-      Number of columns of op(``B``) and ``C``. Must be at least
-      zero.
- 
-   k
-      Number of columns of op(``A``) and rows of op(``B``). Must be
-      at least zero.
- 
-   alpha
-      Scaling factor for the matrix-matrix product.
- 
-   a
-      Pointer to input matrix ``A``.
- 
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``
-         * - Row major
-           - ``A`` is an ``m``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``m``.
-           - ``A`` is an ``k``-by-``m`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``
- 
-      See :ref:`matrix-storage` for more details.
- 
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``m``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``m``.
- 
-   ao
-      Specifies the scalar offset value for matrix ``A``.
- 
-   b
-      Pointer to input matrix ``B``.
- 
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``
- 
-      See :ref:`matrix-storage` for more details.
- 
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
- 
-   bo 
-      Specifies the scalar offset value for matrix ``B``.
- 
-   beta
-      Scaling factor for matrix ``C``.
- 
-   c
-      Pointer to input/output matrix ``C``. It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices . See :ref:`matrix-storage` for more details.
- 
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   co
-      Pointer to offset values for matrix ``C``.
- 
- 
-      If ``offset_type`` = ``offset::fix``, the ``co`` array must have
-      size at least 1.
- 
- 
-      If ``offset_type`` = ``offset::col``, the ``co`` array must have
-      size at least ``max(1,m)``.
- 
- 
-      If ``offset_type`` = ``offset::row``, the ``co`` array must have
-      size at least ``max(1,n)``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
- 
-.. container:: section
- 
-   .. rubric:: Output Parameters
- 
-   c
-      Pointer to the output matrix, overwritten by ``alpha`` * (op(``A``) -
-      ``A_offset``)*(op(``B``) - ``B_offset``) + ``beta`` * ``C`` + ``C_offset``.
- 
-.. container:: section
- 
-   .. rubric:: Notes
- 
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling ``gemm_bias``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
diff --git a/docs/domains/blas/gemmt.rst b/docs/domains/blas/gemmt.rst
deleted file mode 100644
index 7f224b2f7..000000000
--- a/docs/domains/blas/gemmt.rst
+++ /dev/null
@@ -1,418 +0,0 @@
-.. _onemkl_blas_gemmt:
-
-gemmt
-=====
-
-Computes a matrix-matrix product with general matrices, but updates
-only the upper or lower triangular part of the result matrix.
-
-.. _onemkl_blas_gemmt_description:
-
-.. rubric:: Description
-
-The gemmt routines compute a scalar-matrix-matrix product and add
-the result to the upper or lower part of a scalar-matrix product,
-with general matrices. The operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*op(A)*op(B) + beta*C 
-
-where:
-
-op(``X``) is one of op(``X``) = ``X``, or op(``X``) = ``X``\ :sup:`T`, or
-op(``X``) = ``X``\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars
-
-``A``, ``B``, and ``C`` are matrices
-
-op(``A``) is ``n`` x ``k``, op(``B``) is ``k`` x ``n``, and
-``C`` is ``n`` x ``n``.
-
-``gemmt`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_gemmt_buffer:
-
-gemmt (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gemmt(sycl::queue &queue,
-                  onemkl::uplo upper_lower,
-                  onemkl::transpose transa,
-                  onemkl::transpose transb,
-                  std::int64_t n,
-                  std::int64_t k,
-                  T alpha,
-                  sycl::buffer<T,1> &a,
-                  std::int64_t lda,
-                  sycl::buffer<T,1> &b,
-                  std::int64_t ldb,
-                  T beta,
-                  sycl::buffer<T,1> &c,
-                  std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gemmt(sycl::queue &queue,
-                  onemkl::uplo upper_lower,
-                  onemkl::transpose transa,
-                  onemkl::transpose transb,
-                  std::int64_t n,
-                  std::int64_t k,
-                  T alpha,
-                  sycl::buffer<T,1> &a,
-                  std::int64_t lda,
-                  sycl::buffer<T,1> &b,
-                  std::int64_t ldb,
-                  T beta,
-                  sycl::buffer<T,1> &c,
-                  std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``C``\ ’s data is stored in its upper or
-      lower triangle. See :ref:`onemkl_datatypes` for more details.
-   
-   transa
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   transb
-      Specifies op(``B``), the transposition operation applied to
-      ``B``. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows of op(``A``), columns of op(``B``), and
-      columns and rows of\ ``C``. Must be at least zero.
-
-   k
-      Number of columns of op(``A``) and rows of op(``B``). Must be
-      at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Buffer holding the input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   b
-      Buffer holding the input matrix ``B``.
-      
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-   
-      See :ref:`matrix-storage` for more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      Buffer holding the input/output matrix ``C``. Must have size at
-      least ``ldc`` \* ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least
-      ``m``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by the upper or lower triangular
-      part of ``alpha`` * op(``A``)*op(``B``) + ``beta`` * ``C``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling gemmt.
-
-
-.. _onemkl_blas_gemmt_usm:
-
-gemmt (USM Version)
--------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemmt(sycl::queue &queue,
-                         onemkl::uplo upper_lower,
-                         onemkl::transpose transa,
-                         onemkl::transpose transb,
-                         std::int64_t n,
-                         std::int64_t k,
-                         T alpha,
-                         const T* a,
-                         std::int64_t lda,
-                         const T* b,
-                         std::int64_t ldb,
-                         T beta,
-                         T* c,
-                         std::int64_t ldc,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemmt(sycl::queue &queue,
-                         onemkl::uplo upper_lower,
-                         onemkl::transpose transa,
-                         onemkl::transpose transb,
-                         std::int64_t n,
-                         std::int64_t k,
-                         T alpha,
-                         const T* a,
-                         std::int64_t lda,
-                         const T* b,
-                         std::int64_t ldb,
-                         T beta,
-                         T* c,
-                         std::int64_t ldc,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``C``\ ’s data is stored in its upper or
-      lower triangle. See
-      :ref:`onemkl_datatypes` for
-      more details.
-
-   transa
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See
-      :ref:`onemkl_datatypes` for
-      more details.
-
-   transb
-      Specifies op(``B``), the transposition operation applied to
-      ``B``. See
-      :ref:`onemkl_datatypes` for
-      more details.
-
-   n
-      Number of columns of op(``A``), columns of op(``B``), and
-      columns of\ ``C``. Must be at least zero.
-
-   k
-      Number of columns of op(``A``) and rows of op(``B``). Must be
-      at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Pointer to input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``
-
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   b
-      Pointer to input matrix ``B``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``
-
-      See :ref:`matrix-storage` for more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``B`` not transposed
-           - ``B`` transposed
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-      
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      Pointer to input/output matrix ``C``. Must have size at least
-      ``ldc`` \* ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least
-      ``m``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by the upper or lower
-      triangular part of ``alpha`` * op(``A``)*op(``B``) + ``beta`` * ``C``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling gemmt.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
diff --git a/docs/domains/blas/gemv.rst b/docs/domains/blas/gemv.rst
deleted file mode 100644
index 9577fc111..000000000
--- a/docs/domains/blas/gemv.rst
+++ /dev/null
@@ -1,261 +0,0 @@
-.. _onemkl_blas_gemv:
-
-gemv
-====
-
-Computes a matrix-vector product using a general matrix.
-
-.. _onemkl_blas_gemv_description:
-
-.. rubric:: Description
-
-The ``gemv`` routines compute a scalar-matrix-vector product and add the
-result to a scalar-vector product, with a general matrix. The
-operation is defined as:
-
-.. math::
-      
-      y \leftarrow alpha*op(A)*x + beta*y
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``m``-by-``n`` matrix, and ``x``, ``y`` are vectors.
-
-``gemv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_gemv_buffer:
-
-gemv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gemv(sycl::queue &queue,
-                 onemkl::transpose trans,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gemv(sycl::queue &queue,
-                 onemkl::transpose trans,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   trans
-      Specifies ``op(A)``, the transposition operation applied to ``A``.
-
-   m
-      Specifies the number of rows of the matrix ``A``. The value of
-      ``m`` must be at least zero.
-
-   n
-      Specifies the number of columns of the matrix ``A``. The value of
-      ``n`` must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      The buffer holding the input matrix ``A``. Must have a size of at
-      least ``lda``\ \*\ ``n`` if column major layout is used or at
-      least ``lda``\ \*\ ``m`` if row major layout is used. See
-      :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at least
-      ``m`` if column major layout is used or at least ``n`` if row
-      major layout is used.
-
-   x
-      Buffer holding input vector ``x``. The length ``len`` of vector
-      ``x`` is ``n`` if ``A`` is not transposed, and ``m`` if ``A`` is
-      transposed. The buffer must be of size at least (1 + (``len`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for more details.
-
-   incx
-      The stride of vector ``x``.
-
-   beta
-      The scaling factor for vector ``y``.
-
-   y
-      Buffer holding input/output vector ``y``. The length ``len`` of
-      vector ``y`` is ``m``, if ``A`` is not transposed, and ``n`` if
-      ``A`` is transposed. The buffer must be of size at least (1 +
-      (``len`` - 1)*abs(``incy``)) where ``len`` is this length. See
-      :ref:`matrix-storage` for more details.
-
-   incy
-      The stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      The buffer holding updated vector ``y``.
-
-
-.. _onemkl_blas_gemv_usm:
-
-gemv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemv(sycl::queue &queue,
-                        onemkl::transpose trans,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemv(sycl::queue &queue,
-                        onemkl::transpose trans,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   trans
-      Specifies ``op(A)``, the transposition operation applied to
-      ``A``. See
-      :ref:`onemkl_datatypes` for
-      more details.
-
-   m
-      Specifies the number of rows of the matrix ``A``. The value of
-      ``m`` must be at least zero.
-
-   n
-      Specifies the number of columns of the matrix ``A``. The value
-      of ``n`` must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to the input matrix ``A``. Must have a size of at
-      least ``lda``\ \*\ ``n`` if column major layout is used or at
-      least ``lda``\ \*\ ``m`` if row major layout is used. See
-      :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at least
-      ``m`` if column major layout is used or at least ``n`` if row
-      major layout is used.
-
-   x
-      Pointer to the input vector ``x``. The length ``len`` of vector
-      ``x`` is ``n`` if ``A`` is not transposed, and ``m`` if ``A``
-      is transposed. The array holding vector ``x`` must be of size
-      at least (1 + (``len`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      The stride of vector ``x``.
-
-   beta
-      The scaling factor for vector ``y``.
-
-   y
-      Pointer to input/output vector ``y``. The length ``len`` of
-      vector ``y`` is ``m``, if ``A`` is not transposed, and ``n`` if
-      ``A`` is transposed. The array holding input/output vector
-      ``y`` must be of size at least (1 + (``len`` -
-      1)*abs(``incy``)) where ``len`` is this length. See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      The stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      The pointer to updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/gemv_batch.rst b/docs/domains/blas/gemv_batch.rst
deleted file mode 100644
index 0a56b73b6..000000000
--- a/docs/domains/blas/gemv_batch.rst
+++ /dev/null
@@ -1,472 +0,0 @@
-.. _onemkl_blas_gemv_batch:
-
-gemv_batch
-==========
-
-Computes a group of ``gemv`` operations.
-
-.. _onemkl_blas_gemv_batch_description:
-
-.. rubric:: Description
-
-The ``gemv_batch`` routines are batched versions of
-:ref:`onemkl_blas_gemv`, performing multiple ``gemv`` operations in a
-single call. Each ``gemv`` operations perform a scalar-matrix-vector
-product and add the result to a scalar-vector product.
-   
-``gemv_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_gemv_batch_buffer:
-
-gemv_batch (Buffer Version)
----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``gemv_batch`` supports only the strided API. 
-
-The strided API operation is defined as:
-::
-
-   for i = 0 … batch_size – 1
-       A is a matrix at offset i * stridea in a.
-       X and Y are matrices at offset i * stridex, i * stridey, in x and y.
-       Y := alpha * op(A) * X + beta * Y
-   end for
-
-where:
-
-op(A) is one of op(A) = A, or op(A) = A\ :sup:`T`, or op(A) = A\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is a matrix and ``X`` and ``Y`` are vectors,
-
-The ``x`` and ``y`` buffers contain all the input matrices. The stride
-between vectors is given by the stride parameter. The total number of
-vectors in ``x`` and ``y`` buffers is given by the ``batch_size``
-parameter.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gemv_batch(sycl::queue &queue,
-                       onemkl::transpose trans,
-                       std::int64_t m,
-                       std::int64_t n,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       T beta,
-                       sycl::buffer<T,1> &y,
-                       std::int64_t incy,
-                       std::int64_t stridey,
-                       std::int64_t batch_size)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gemv_batch(sycl::queue &queue,
-                       onemkl::transpose trans,
-                       std::int64_t m,
-                       std::int64_t n,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &x,
-                       std::int64_t incx,
-                       std::int64_t stridex,
-                       T beta,
-                       sycl::buffer<T,1> &y,
-                       std::int64_t incy,
-                       std::int64_t stridey,
-                       std::int64_t batch_size)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   trans
-      Specifies op(``A``) the transposition operation applied to the
-      matrices ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of op(``A``). Must be at least zero.
-
-   n
-      Number of columns of op(``A``). Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector products.
-
-   a
-      Buffer holding the input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive
-      and at least ``m`` if column major layout is used or at least
-      ``n`` if row major layout is used.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   x
-      Buffer holding the input vectors ``X`` with size ``stridex`` * ``batch_size``.
-
-   incx
-      The stride of the vector ``X``. It must be positive.
-
-   stridex
-      Stride between different consecutive ``X`` vectors, must be at least 0.
-
-   beta
-      Scaling factor for the vector ``Y``.
-
-   y
-      Buffer holding input/output vectors ``Y`` with size ``stridey`` * ``batch_size``.
-
-   incy
-      Stride between two consecutive elements of the ``y`` vectors.
-
-   stridey
-      Stride between two consecutive ``Y`` vectors. Must be at least
-      (1 + (len-1)*abs(incy)) where ``len`` is ``m`` if the matrix ``A``
-      is non transpose or ``n`` otherwise.
-
-   batch_size
-      Specifies the number of matrix-vector operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Output overwritten by ``batch_size`` matrix-vector product
-      operations of the form ``alpha`` * op(``A``) * ``X`` + ``beta`` * ``Y``.
-
-
-.. _onemkl_blas_gemv_batch_usm:
-
-gemv_batch (USM Version)
----------------------------
-
-.. rubric:: Description
-
-The USM version of ``gemv_batch`` supports the group API and strided API. 
-
-The group API operation is defined as:
-::
-
-   idx = 0
-   for i = 0 … group_count – 1
-       for j = 0 … group_size – 1
-           A is an m x n matrix in a[idx]
-           X and Y are vectors in x[idx] and y[idx]
-           Y := alpha[i] * op(A) * X + beta[i] * Y
-           idx = idx + 1
-       end for
-   end for
-
-The strided API operation is defined as
-::
-
-   for i = 0 … batch_size – 1
-       A is a matrix at offset i * stridea in a.
-       X and Y are vectors at offset i * stridex, i * stridey in x and y.
-       Y := alpha * op(A) * X + beta * Y
-   end for
-
-where:
-
-op(A) is one of op(A) = A, or op(A) = A\ :sup:`T`, or op(A) = A\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is a matrix and ``X`` and ``Y`` are vectors,
-
-For group API, ``x`` and ``y`` arrays contain the pointers for all the input vectors. 
-``A`` array contains the pointers to all input matrices.
-The total number of vectors in ``x`` and ``y`` and matrices in ``A`` are given by: 
-
-.. math::
-
-      total\_batch\_count = \sum_{i=0}^{group\_count-1}group\_size[i]    
- 
-For strided API, ``x`` and ``y`` arrays contain all the input
-vectors. ``A`` array contains the pointers to all input matrices.  The
-total number of vectors in ``x`` and ``y`` and matrices in ``A`` are given by the
-``batch_size`` parameter.
-   
-**Group API**
-
-.. rubric:: Syntax
-   
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemv_batch(sycl::queue &queue,
-                              onemkl::transpose *trans,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              const T **x,
-                              std::int64_t *incx,
-                              T *beta,
-                              T **y,
-                              std::int64_t *incy,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemv_batch(sycl::queue &queue,
-                              onemkl::transpose *trans,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              const T **x,
-                              std::int64_t *incx,
-                              T *beta,
-                              T **y,
-                              std::int64_t *incy,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   trans
-      Array of ``group_count`` ``onemkl::transpose`` values. ``trans[i]`` specifies the form of op(``A``) used in
-      the matrix-vector product in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Array of ``group_count`` integers. ``m[i]`` specifies the
-      number of rows of op(``A``) for every matrix in group ``i``. All entries must be at least zero.
-
-   n
-      Array of ``group_count`` integers. ``n[i]`` specifies the
-      number of columns of op(``A``) for every matrix in group ``i``. All entries must be at least zero.
-
-   alpha
-      Array of ``group_count`` scalar elements. ``alpha[i]`` specifies
-      the scaling factor for every matrix-vector product in group
-      ``i``.
-
-   a
-      Array of pointers to input matrices ``A`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      Array of ``group_count`` integers. ``lda[i]`` specifies the
-      leading dimension of ``A`` for every matrix in group ``i``. All
-      entries must be positive and at least ``m`` if column major
-      layout is used or at least ``n`` if row major layout is used.
-             
-   x
-      Array of pointers to input vectors ``X`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   incx
-      Array of ``group_count`` integers. ``incx[i]`` specifies the
-      stride of ``X`` for every vector in group ``i``. All
-      entries must be positive.
-             
-   beta
-      Array of ``group_count`` scalar elements. ``beta[i]`` specifies
-      the scaling factor for vector ``Y`` for every vector in group
-      ``i``.
-
-   y
-      Array of pointers to input/output vectors ``Y`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   incy
-      Array of ``group_count`` integers. ``incy[i]`` specifies the
-      leading dimension of ``Y`` for every vector in group ``i``.  All
-      entries must be positive and ``incy[i]`` must be at least
-      ``m[i]`` if column major layout is used or at
-      least ``n[i]`` if row major layout is used.
-
-   group_count
-      Specifies the number of groups. Must be at least 0.
-
-   group_size
-      Array of ``group_count`` integers. ``group_size[i]`` specifies the
-      number of matrix-vector products in group ``i``. All entries must be at least 0.
-
-   dependencies
-         List of events to wait for before starting computation, if any.
-         If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Overwritten by vector calculated by 
-      (``alpha[i]`` * op(``A``) * ``X`` + ``beta[i]`` * ``Y``) for group ``i``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gemv_batch(sycl::queue &queue,
-                              onemkl::transpose trans,
-                              std::int64_t m,
-                              std::int64_t n,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              const T *x,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T beta,
-                              T *y,
-                              std::int64_t incy,
-                              std::int64_t stridey,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gemv_batch(sycl::queue &queue,
-                              onemkl::transpose trans,
-                              std::int64_t m,
-                              std::int64_t n,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              const T *x,
-                              std::int64_t incx,
-                              std::int64_t stridex,
-                              T beta,
-                              T *y,
-                              std::int64_t incy,
-                              std::int64_t stridey,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   trans
-      Specifies op(``A``) the transposition operation applied to the
-      matrices ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of op(``A``). Must be at least zero.
-
-   n
-      Number of columns of op(``A``). Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector products.
-
-   a
-      Pointer to the input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive
-      and at least ``m`` if column major layout is used or at least
-      ``n`` if row major layout is used.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   x
-      Pointer to the input vectors ``X`` with size ``stridex`` * ``batch_size``.
-
-   incx
-      Stride of the vector ``X``. It must be positive.
-
-   stridex
-      Stride between different consecutive ``X`` vectors, must be at least 0.
-
-   beta
-      Scaling factor for the vector ``Y``.
-
-   y
-      Pointer to the input/output vectors ``Y`` with size ``stridey`` * ``batch_size``.
-
-   incy
-      Stride between two consecutive elements of the ``y`` vectors.
-
-   stridey
-      Stride between two consecutive ``Y`` vectors. Must be at least
-      (1 + (len-1)*abs(incy)) where ``len`` is ``m`` if the matrix ``A``
-      is non transpose or ``n`` otherwise.
-
-   batch_size
-      Specifies the number of matrix-vector operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Output overwritten by ``batch_size`` matrix-vector product
-      operations of the form ``alpha`` * op(``A``) * ``X`` + ``beta`` * ``Y``.
-
-.. container:: section
-      
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
diff --git a/docs/domains/blas/ger.rst b/docs/domains/blas/ger.rst
deleted file mode 100644
index ea128414d..000000000
--- a/docs/domains/blas/ger.rst
+++ /dev/null
@@ -1,226 +0,0 @@
-.. _onemkl_blas_ger:
-
-ger
-===
-
-Computes a rank-1 update of a general matrix.
-
-.. _onemkl_blas_ger_description:
-
-.. rubric:: Description
-
-The ``ger`` routines compute a scalar-vector-vector product and add the
-result to a general matrix. The operation is defined as:
-
-.. math::
-
-      A \leftarrow alpha*x*y^T + A
-
-where:
-
-``alpha`` is scalar,
-
-``A`` is an ``m``-by-``n`` matrix,
-
-``x`` is a vector of length ``m``,
-
-``y`` is a vector of length ``n``.
-
-``ger`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_ger_buffer:
-
-ger (Buffer Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void ger(sycl::queue &queue,
-                std::int64_t m,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &y,
-                std::int64_t incy,
-                sycl::buffer<T,1> &a,
-                std::int64_t lda)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void ger(sycl::queue &queue,
-                std::int64_t m,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &y,
-                std::int64_t incy,
-                sycl::buffer<T,1> &a,
-                std::int64_t lda)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``m`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n`` if column major layout is used or at least ``lda``\ \*\ ``m``
-      if row major layout is used. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at least
-      ``m`` if column major layout is used or at least ``n`` if row
-      major layout is used.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated matrix ``A``.
-
-
-.. _onemkl_blas_ger_usm:
-
-ger (USM Version)
------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event ger(sycl::queue &queue,
-                       std::int64_t m,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       const T *y,
-                       std::int64_t incy,
-                       T *a,
-                       std::int64_t lda,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event ger(sycl::queue &queue,
-                       std::int64_t m,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       const T *y,
-                       std::int64_t incy,
-                       T *a,
-                       std::int64_t lda,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``m`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Pointer to input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n`` if column major layout is used or at least ``lda``\ \*\ ``m``
-      if row major layout is used. See :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at least
-      ``m`` if column major layout is used or at least ``n`` if row
-      major layout is used.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated matrix ``A``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/gerc.rst b/docs/domains/blas/gerc.rst
deleted file mode 100644
index 8a8c28463..000000000
--- a/docs/domains/blas/gerc.rst
+++ /dev/null
@@ -1,227 +0,0 @@
-.. _onemkl_blas_gerc:
-
-gerc
-====
-
-Computes a rank-1 update (conjugated) of a general complex matrix.
-
-.. _onemkl_blas_gerc_description:
-
-.. rubric:: Description
-
-The ``gerc`` routines compute a scalar-vector-vector product and add the
-result to a general matrix. The operation is defined as:
-
-.. math::
-
-      A \leftarrow alpha*x*y^H + A
-
-
-where:
-
-``alpha`` is a scalar,
-
-``A`` is an ``m``-by-``n`` matrix,
-
-``x`` is a vector of length ``m``,
-
-``y`` is vector of length ``n``.
-
-``gerc`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_gerc_buffer:
-
-gerc (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void gerc(sycl::queue &queue,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void gerc(sycl::queue &queue,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``m`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n`` if column major layout is used or at least ``lda``\ \*\ ``m``
-      if row major layout is used. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at least
-      ``m`` if column major layout is used or at least ``n`` if row
-      major layout is used.
-
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated matrix ``A``.
-
-
-.. _onemkl_blas_gerc_usm:
-
-gerc (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event gerc(sycl::queue &queue,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event gerc(sycl::queue &queue,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to the input vector ``x``. The array holding input
-      vector ``x`` must be of size at least (1 + (``m`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to the input/output vector ``y``. The array holding the
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A``\ must have size at least ``lda``\ \*\ ``n`` if column
-      major layout is used or at least ``lda``\ \*\ ``m`` if row
-      major layout is used. See :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at least
-      ``m`` if column major layout is used or at least ``n`` if row
-      major layout is used.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated matrix ``A``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/geru.rst b/docs/domains/blas/geru.rst
deleted file mode 100644
index 8e22b3ba9..000000000
--- a/docs/domains/blas/geru.rst
+++ /dev/null
@@ -1,227 +0,0 @@
-.. _onemkl_blas_geru:
-
-geru
-====
-
-Computes a rank-1 update (unconjugated) of a general complex matrix.
-
-.. _onemkl_blas_geru_description:
-
-.. rubric:: Description
-
-The ``geru`` routines routines compute a scalar-vector-vector product and
-add the result to a general matrix. The operation is defined as
-
-.. math::
-      
-      A \leftarrow alpha*x*y^T + A
-
-where:
-
-``alpha`` is a scalar,
-
-``A`` is an ``m``-by-``n`` matrix,
-
-``x`` is a vector of length ``m``,
-
-``y`` is a vector of length ``n``.
-
-``geru`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_geru_buffer:
-
-geru (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void geru(sycl::queue &queue,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void geru(sycl::queue &queue,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``m`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n`` if column major layout is used or at least ``lda``\ \*\ ``m``
-      if row major layout is used. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at least
-      ``m`` if column major layout is used or at least ``n`` if row
-      major layout is used.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated matrix ``A``.
-
-
-.. _onemkl_blas_geru_usm:
-
-geru (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event geru(sycl::queue &queue,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event geru(sycl::queue &queue,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   m
-      Number of rows of ``A``. Must be at least zero.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to the input vector ``x``. The array holding input
-      vector ``x`` must be of size at least (1 + (``m`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n`` if column
-      major layout is used or at least ``lda``\ \*\ ``m`` if row
-      major layout is used. See :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be positive and at
-      least ``m`` if column major layout is used or at least ``n``
-      if row major layout is used.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated matrix ``A``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/hbmv.rst b/docs/domains/blas/hbmv.rst
deleted file mode 100644
index a86e7fef8..000000000
--- a/docs/domains/blas/hbmv.rst
+++ /dev/null
@@ -1,245 +0,0 @@
-.. _onemkl_blas_hbmv:
-
-hbmv
-====
-
-Computes a matrix-vector product using a Hermitian band matrix.
-
-.. _onemkl_blas_hbmv_description:
-
-.. rubric:: Description
-
-The ``hbmv`` routines compute a scalar-matrix-vector product and add the
-result to a scalar-vector product, with a Hermitian band matrix. The
-operation is defined as
-
-.. math::
-
-      y \leftarrow alpha*A*x + beta*y
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``n``-by-``n`` Hermitian band matrix, with ``k``
-super-diagonals,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``hbmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_hbmv_buffer:
-
-hbmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void hbmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void hbmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of super-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` + 1),
-      and positive.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``m`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_hbmv_usm:
-
-hbmv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event hbmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event hbmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of super-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to the input matrix ``A``. The array holding input
-      matrix ``A`` must have size at least ``lda``\ \*\ ``n``. See
-      :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` +
-      1), and positive.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``m`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/hemm.rst b/docs/domains/blas/hemm.rst
deleted file mode 100644
index 4098cd75b..000000000
--- a/docs/domains/blas/hemm.rst
+++ /dev/null
@@ -1,315 +0,0 @@
-.. _onemkl_blas_hemm:
-
-hemm
-====
-
-Computes a matrix-matrix product where one input matrix is Hermitian
-and one is general.
-
-.. _onemkl_blas_hemm_description:
-
-.. rubric:: Description
-
-The ``hemm`` routines compute a scalar-matrix-matrix product and add the
-result to a scalar-matrix product, where one of the matrices in the
-multiplication is Hermitian. The argument ``left_right`` determines
-if the Hermitian matrix, ``A``, is on the left of the multiplication
-(``left_right`` = ``side::left``) or on the right (``left_right`` =
-``side::right``). Depending on ``left_right``, the operation is
-defined as:
-
-.. math::
-
-      C \leftarrow alpha*A*B + beta*C
-
-or
-
-.. math::
-
-      C \leftarrow alpha*B*A + beta*C
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is a Hermitian matrix, either ``m``-by-``m`` or ``n``-by-``n``
-matrices,
-
-``B`` and ``C`` are ``m``-by-``n`` matrices.
-
-``hemm`` supports the following precisions:
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_hemm_buffer:
-
-hemm (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void hemm(sycl::queue &queue,
-                 onemkl::side left_right,
-                 onemkl::uplo upper_lower,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb,
-                 T beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void hemm(sycl::queue &queue,
-                 onemkl::side left_right,
-                 onemkl::uplo upper_lower,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb,
-                 T beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` is on the left side of the multiplication
-      (``side::left``) or on the right side (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   uplo
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Specifies the number of rows of the matrix ``B`` and ``C``.
-
-      The value of ``m`` must be at least zero.
-
-   n
-      Specifies the number of columns of the matrix ``B`` and ``C``.
-
-      The value of ``n`` must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``A`` is on the left of the multiplication,
-      or ``lda``\ \*\ ``n`` if ``A`` is on the right. See :ref:`matrix-storage`
-      for more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if ``A`` is on
-      the left of the multiplication, or at least ``n`` if ``A`` is on
-      the right. Must be positive.
-
-   b
-      Buffer holding input matrix ``B``. Must have size at least
-      ``ldb``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldb``\ \*\ ``m`` if row
-      major layout is used to store matrices. See :ref:`matrix-storage` for
-      more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      The buffer holding the input/output matrix ``C``. It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices . See :ref:`matrix-storage` for more details.
-
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by ``alpha``\ \*\ ``A``\ \*\ ``B`` +
-      ``beta``\ \*\ ``C`` (``left_right`` = ``side::left``) or
-      ``alpha``\ \*\ ``B``\ \*\ ``A`` + ``beta``\ \*\ ``C``
-      (``left_right`` = ``side::right``).
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized before
-   calling ``hemm``.
-
-      
-
-.. _onemkl_blas_hemm_usm:
-
-hemm (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event hemm(sycl::queue &queue,
-                        onemkl::side left_right,
-                        onemkl::uplo upper_lower,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        const T* b,
-                        std::int64_t ldb,
-                        T beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event hemm(sycl::queue &queue,
-                        onemkl::side left_right,
-                        onemkl::uplo upper_lower,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        const T* b,
-                        std::int64_t ldb,
-                        T beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` is on the left side of the
-      multiplication (``side::left``) or on the right side
-      (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   uplo
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Specifies the number of rows of the matrix ``B`` and ``C``.
-
-      The value of ``m`` must be at least zero.
-
-   n
-      Specifies the number of columns of the matrix ``B`` and ``C``.
-
-      The value of ``n`` must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Pointer to input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``A`` is on the left of the
-      multiplication, or ``lda``\ \*\ ``n`` if ``A`` is on the right.
-      See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if ``A`` is
-      on the left of the multiplication, or at least ``n`` if ``A``
-      is on the right. Must be positive.
-
-   b
-      Pointer to input matrix ``B``. Must have size at least
-      ``ldb``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldb``\ \*\ ``m`` if row
-      major layout is used to store matrices. See :ref:`matrix-storage` for
-      more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      The pointer to input/output matrix ``C``. It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices . See :ref:`matrix-storage` for more details.
-      
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by
-      ``alpha``\ \*\ ``A``\ \*\ ``B`` + ``beta``\ \*\ ``C``
-      (``left_right`` = ``side::left``) or
-      ``alpha``\ \*\ ``B``\ \*\ ``A`` + ``beta``\ \*\ ``C``
-      (``left_right`` = ``side::right``).
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling ``hemm``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/hemv.rst b/docs/domains/blas/hemv.rst
deleted file mode 100644
index cc959ba95..000000000
--- a/docs/domains/blas/hemv.rst
+++ /dev/null
@@ -1,232 +0,0 @@
-.. _onemkl_blas_hemv:
-
-hemv
-====
-
-Computes a matrix-vector product using a Hermitian matrix.
-
-.. _onemkl_blas_hemv_description:
-
-.. rubric:: Description
-
-The ``hemv`` routines compute a scalar-matrix-vector product and add the
-result to a scalar-vector product, with a Hermitian matrix. The
-operation is defined as
-
-.. math::
-
-      y \leftarrow alpha*A*x + beta*y 
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``n``-by-``n`` Hermitian matrix,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``hemv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_hemv_buffer:
-
-hemv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void hemv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void hemv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether *A* is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``m``, and
-      positive.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-      
-
-.. _onemkl_blas_hemv_usm:
-
-hemv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event hemv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event hemv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether *A* is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``m``, and
-      positive.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/her.rst b/docs/domains/blas/her.rst
deleted file mode 100644
index ed243f62d..000000000
--- a/docs/domains/blas/her.rst
+++ /dev/null
@@ -1,205 +0,0 @@
-.. _onemkl_blas_her:
-
-her
-===
-
-Computes a rank-1 update of a Hermitian matrix.
-
-.. _onemkl_blas_her_description:
-
-.. rubric:: Description
-
-The ``her`` routines compute a scalar-vector-vector product and add the
-result to a Hermitian matrix. The operation is defined as:
-
-.. math::
-      
-      A \leftarrow alpha*x*x^H + A
-
-where:
-
-``alpha`` is scalar,
-
-``A`` is an ``n``-by-``n`` Hermitian matrix,
-
-``x`` is a vector of length ``n``.
-
-``her`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_her_buffer:
-
-her (Buffer Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void her(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a,
-                std::int64_t lda)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void her(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a,
-                std::int64_t lda)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\  \= ``upper`` or the updated
-      lower triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \ =\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-
-.. _onemkl_blas_her_usm:
-
-her (USM Version)
------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event her(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       std::int64_t lda,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event her(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       std::int64_t lda,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether *A* is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper`` or the updated
-      lower triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/her2.rst b/docs/domains/blas/her2.rst
deleted file mode 100644
index f9272adae..000000000
--- a/docs/domains/blas/her2.rst
+++ /dev/null
@@ -1,231 +0,0 @@
-.. _onemkl_blas_her2:
-
-her2
-====
-
-Computes a rank-2 update of a Hermitian matrix.
-
-.. _onemkl_blas_her2_description:
-
-.. rubric:: Description
-
-The ``her2`` routines compute two scalar-vector-vector products and add
-them to a Hermitian matrix. The operation is defined as:
-
-.. math::
-
-      A \leftarrow alpha*x*y^H + conjg(alpha)*y*x^H + A
-
-where:
-
-``alpha`` is a scalar,
-
-``A`` is an ``n``-by-``n`` Hermitian matrix,
-
-``x`` and ``y`` are vectors or length ``n``.
-
-``her2`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_her2_buffer:
-
-her2 (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void her2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void her2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated
-      lower triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-      
-
-.. _onemkl_blas_her2_usm:
-
-her2 (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event her2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event her2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated
-      lower triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/her2k.rst b/docs/domains/blas/her2k.rst
deleted file mode 100644
index 6f03052da..000000000
--- a/docs/domains/blas/her2k.rst
+++ /dev/null
@@ -1,397 +0,0 @@
-.. _onemkl_blas_her2k:
-
-her2k
-=====
-
-Performs a Hermitian rank-2k update.
-
-.. _onemkl_blas_her2k_description:
-
-.. rubric:: Description
-
-The ``her2k`` routines perform a rank-2k update of an ``n`` x ``n``
-Hermitian matrix ``C`` by general matrices ``A`` and ``B``. 
-
-If ``trans`` = ``transpose::nontrans``, the operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*A*B^H + conjg(alpha)*B*A^H + beta*C
-
-where ``A`` is ``n`` x ``k`` and ``B`` is ``k`` x ``n``.
-
-If ``trans`` = ``transpose::conjtrans``, the operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*B*A^H + conjg(alpha)*A*B^H + beta*C
-
-where ``A`` is ``k`` x ``n`` and ``B`` is ``n`` x ``k``.
-
-In both cases:
-
-``alpha`` is a complex scalar and ``beta`` is a real scalar.
-
-``C`` is a Hermitian matrix and ``A`` , ``B`` are general matrices.
-
-The inner dimension of both matrix multiplications is ``k``.
-
-``her2k`` supports the following precisions:
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_real 
-      * -  ``std::complex<float>`` 
-        -  ``float`` 
-      * -  ``std::complex<double>`` 
-        -  ``double`` 
-
-.. _onemkl_blas_her2k_buffer:
-
-her2k (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void her2k(sycl::queue &queue,
-                  onemkl::uplo upper_lower,
-                  onemkl::transpose trans,
-                  std::int64_t n,
-                  std::int64_t k,
-                  T alpha,
-                  sycl::buffer<T,1> &a,
-                  std::int64_t lda,
-                  sycl::buffer<T,1> &b,
-                  std::int64_t ldb,
-                  T_real beta,
-                  sycl::buffer<T,1> &c,
-                  std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void her2k(sycl::queue &queue,
-                  onemkl::uplo upper_lower,
-                  onemkl::transpose trans,
-                  std::int64_t n,
-                  std::int64_t k,
-                  T alpha,
-                  sycl::buffer<T,1> &a,
-                  std::int64_t lda,
-                  sycl::buffer<T,1> &b,
-                  std::int64_t ldb,
-                  T_real beta,
-                  sycl::buffer<T,1> &c,
-                  std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies the operation to apply, as described above. Supported
-      operations are ``transpose::nontrans`` and
-      ``transpose::conjtrans``.
-
-   n
-      The number of rows and columns in ``C``. The value of ``n`` must
-      be at least zero.
-
-   k
-      The inner dimension of matrix multiplications. The value of ``k``
-      must be at least equal to zero.
-
-   alpha
-      Complex scaling factor for the rank-2k update.
-
-   a
-      Buffer holding input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-
-      See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-                
-   b
-      Buffer holding input matrix ``B``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-
-      See :ref:`matrix-storage`
-      for more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-             
-   beta
-      Real scaling factor for matrix ``C``.
-      
-   c
-      Buffer holding input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least ``n``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by the updated ``C`` matrix.
-
-
-.. _onemkl_blas_her2k_usm:
-
-her2k (USM Version)
--------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event her2k(sycl::queue &queue,
-                         onemkl::uplo upper_lower,
-                         onemkl::transpose trans,
-                         std::int64_t n,
-                         std::int64_t k,
-                         T alpha,
-                         const T* a,
-                         std::int64_t lda,
-                         const T* b,
-                         std::int64_t ldb,
-                         T_real beta,
-                         T* c,
-                         std::int64_t ldc,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event her2k(sycl::queue &queue,
-                         onemkl::uplo upper_lower,
-                         onemkl::transpose trans,
-                         std::int64_t n,
-                         std::int64_t k,
-                         T alpha,
-                         const T* a,
-                         std::int64_t lda,
-                         const T* b,
-                         std::int64_t ldb,
-                         T_real beta,
-                         T* c,
-                         std::int64_t ldc,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies the operation to apply, as described above. Supported
-      operations are ``transpose::nontrans`` and
-      ``transpose::conjtrans``.
-
-   n
-      The number of rows and columns in ``C``. The value of ``n``
-      must be at least zero.
-
-   k
-      The inner dimension of matrix multiplications. The value of
-      ``k`` must be at least equal to zero.
-
-   alpha
-      Complex scaling factor for the rank-2k update.
-
-   a
-      Pointer to input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-   
-   b
-      Pointer to input matrix ``B``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-         * - Row major
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-   
-      See :ref:`matrix-storage` for
-      more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-         * - Row major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-
-   beta
-      Real scaling factor for matrix ``C``.
-
-   c
-      Pointer to input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least
-      ``n``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by the updated ``C``
-      matrix.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/herk.rst b/docs/domains/blas/herk.rst
deleted file mode 100644
index e4d4d16a9..000000000
--- a/docs/domains/blas/herk.rst
+++ /dev/null
@@ -1,309 +0,0 @@
-.. _onemkl_blas_herk:
-
-herk
-====
-
-Performs a Hermitian rank-k update.
-
-.. _onemkl_blas_herk_description:
-
-.. rubric:: Description
-
-The ``herk`` routines compute a rank-k update of a Hermitian matrix
-``C`` by a general matrix ``A``. The operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*op(A)*op(A)^H + beta*C
-
-where:
-
-op(``X``) is one of op(``X``) = ``X`` or op(``X``) = ``X``\ :sup:`H`,
-
-``alpha`` and ``beta`` are real scalars,
-
-``C`` is a Hermitian matrix and ``A`` is a general matrix.
-
-Here op(``A``) is ``n`` x ``k``, and ``C`` is ``n`` x ``n``.
-
-``herk`` supports the following precisions:
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_real 
-      * -  ``std::complex<float>`` 
-        -  ``float`` 
-      * -  ``std::complex<double>`` 
-        -  ``double`` 
-
-.. _onemkl_blas_herk_buffer:
-
-herk (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void herk(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T_real alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 T_real beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void herk(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T_real alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 T_real beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See
-      :ref:`onemkl_datatypes` for more
-      details. Supported operations are ``transpose::nontrans`` and
-      ``transpose::conjtrans``.
-
-   n
-      The number of rows and columns in ``C``.The value of ``n`` must be
-      at least zero.
-
-   k
-      Number of columns in op(``A``).
-
-      The value of ``k`` must be at least zero.
-
-   alpha
-      Real scaling factor for the rank-k update.
-
-   a
-      Buffer holding input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-
-      See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   beta
-      Real scaling factor for matrix ``C``.
-
-   c
-      Buffer holding input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least ``n``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      The output buffer, overwritten by
-      ``alpha``\ \*op(``A``)*op(``A``)\ :sup:`T` + ``beta``\ \*\ ``C``.
-      The imaginary parts of the diagonal elements are set to zero.
-
-      
-
-.. _onemkl_blas_herk_usm:
-
-herk (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event herk(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T_real alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T_real beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event herk(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T_real alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T_real beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details. Supported operations are ``transpose::nontrans``
-      and ``transpose::conjtrans``.
-
-   n
-      The number of rows and columns in ``C``.The value of ``n`` must
-      be at least zero.
-
-   k
-      Number of columns in op(``A``).
-
-      The value of ``k`` must be at least zero.
-
-   alpha
-      Real scaling factor for the rank-k update.
-
-   a
-      Pointer to input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   beta
-      Real scaling factor for matrix ``C``.
-
-   c
-      Pointer to input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least
-      ``n``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by
-      ``alpha``\ \*op(``A``)*op(``A``)\ :sup:`T` +
-      ``beta``\ \*\ ``C``. The imaginary parts of the diagonal
-      elements are set to zero.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/hpmv.rst b/docs/domains/blas/hpmv.rst
deleted file mode 100644
index 17872b2b2..000000000
--- a/docs/domains/blas/hpmv.rst
+++ /dev/null
@@ -1,228 +0,0 @@
-.. _onemkl_blas_hpmv:
-
-hpmv
-====
-
-Computes a matrix-vector product using a Hermitian packed matrix.
-
-.. _onemkl_blas_hpmv_description:
-
-.. rubric:: Description
-
-The ``hpmv`` routines compute a scalar-matrix-vector product and add the
-result to a scalar-vector product, with a Hermitian packed matrix.
-The operation is defined as
-
-.. math::
-
-      y \leftarrow alpha*A*x + beta*y
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``n``-by-``n`` Hermitian matrix supplied in packed form,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``hpmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_hpmv_buffer:
-
-hpmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void hpmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void hpmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n``\ +1))/2. See :ref:`matrix-storage` for
-      more details.
-
-      The imaginary parts of the diagonal elements need not be set and
-      are assumed to be zero.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-      
-
-.. _onemkl_blas_hpmv_usm:
-
-hpmv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event hpmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event hpmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n``\ +1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-      The imaginary parts of the diagonal elements need not be set
-      and are assumed to be zero.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/hpr.rst b/docs/domains/blas/hpr.rst
deleted file mode 100644
index a7fd49cde..000000000
--- a/docs/domains/blas/hpr.rst
+++ /dev/null
@@ -1,201 +0,0 @@
-.. _onemkl_blas_hpr:
-
-hpr
-===
-
-Computes a rank-1 update of a Hermitian packed matrix.
-
-.. _onemkl_blas_hpr_description:
-
-.. rubric:: Description
-
-The ``hpr`` routines compute a scalar-vector-vector product and add the
-result to a Hermitian packed matrix. The operation is defined as
-
-.. math::
-
-      A \leftarrow alpha*x*x^H + A
-
-where:
-
-``alpha`` is scalar,
-
-``A`` is an ``n``-by-``n`` Hermitian matrix, supplied in packed form,
-
-``x`` is a vector of length ``n``.
-
-``hpr`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_hpr_buffer:
-
-hpr (Buffer Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void hpr(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void hpr(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n``-1))/2. See :ref:`matrix-storage` for
-      more details.
-
-      The imaginary part of the diagonal elements need not be set and
-      are assumed to be zero.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-
-.. _onemkl_blas_hpr_usm:
-
-hpr (USM Version)
------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event hpr(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event hpr(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n``-1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-      The imaginary part of the diagonal elements need not be set and
-      are assumed to be zero.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/hpr2.rst b/docs/domains/blas/hpr2.rst
deleted file mode 100644
index 3e3fa4a63..000000000
--- a/docs/domains/blas/hpr2.rst
+++ /dev/null
@@ -1,226 +0,0 @@
-.. _onemkl_blas_hpr2:
-
-hpr2
-====
-
-Performs a rank-2 update of a Hermitian packed matrix.
-
-.. _onemkl_blas_hpr2_description:
-
-.. rubric:: Description
-
-The ``hpr2`` routines compute two scalar-vector-vector products and add
-them to a Hermitian packed matrix. The operation is defined as
-
-.. math::
-
-      A \leftarrow alpha*x*y^H + conjg(alpha)*y*x^H + A
-
-where:
-
-``alpha`` is a scalar,
-
-``A`` is an ``n``-by-``n`` Hermitian matrix, supplied in packed form,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``hpr2`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_hpr2_buffer:
-
-hpr2 (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void hpr2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void hpr2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n``-1))/2. See :ref:`matrix-storage` for
-      more details.
-
-      The imaginary parts of the diagonal elements need not be set and
-      are assumed to be zero.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-      
-
-.. _onemkl_blas_hpr2_usm:
-
-hpr2 (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event hpr2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event hpr2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n``-1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-      The imaginary parts of the diagonal elements need not be set
-      and are assumed to be zero.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the Hermitian
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the Hermitian matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      The imaginary parts of the diagonal elements are set to zero.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/iamax.rst b/docs/domains/blas/iamax.rst
deleted file mode 100644
index ce02af8de..000000000
--- a/docs/domains/blas/iamax.rst
+++ /dev/null
@@ -1,167 +0,0 @@
-.. _onemkl_blas_iamax:
-
-iamax
-=====
-
-Finds the index of the element with the largest absolute value in a vector.
-
-.. _onemkl_blas_iamax_description:
-
-.. rubric:: Description
-
-The ``iamax`` routines return an index ``i`` such that ``x[i]``
-has the maximum absolute value of all elements in vector ``x`` (real
-variants), or such that (\|Re(``x[i]``)\| + \|Im(``x[i]``)\|) is maximal
-(complex variants).
-
-If either ``n`` or ``incx`` are not positive, the routine returns
-``0``.
-
-If more than one vector element is found with the same largest
-absolute value, the index of the first one encountered is returned.
-
-If the vector contains ``NaN`` values, then the routine returns the
-index of the first ``NaN``.
-
-``iamax`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std:complex<double>`` 
-
-.. container:: Note
-
-   .. rubric:: Note
-      :class: NoteTipHead
-
-   The index is zero-based.
-
-.. _onemkl_blas_iamax_buffer:
-
-iamax (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void iamax(sycl::queue &queue,
-                  std::int64_t n,
-                  sycl::buffer<T,
-                  1> &x,
-                  std::int64_t incx,
-                  sycl::buffer<std::int64_t,
-                  1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void iamax(sycl::queue &queue,
-                  std::int64_t n,
-                  sycl::buffer<T,
-                  1> &x,
-                  std::int64_t incx,
-                  sycl::buffer<std::int64_t,
-                  1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      The number of elements in vector ``x``.
-
-   x
-      The buffer that holds the input vector ``x``. The buffer must be
-      of size at least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage`
-      for more details.
-
-   incx
-      The stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      The buffer where the zero-based index ``i`` of the maximal element
-      is stored.
-
-
-.. _onemkl_blas_iamax_usm:
-
-iamax (USM Version)
--------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event iamax(sycl::queue &queue,
-                         std::int64_t n,
-                         const T *x,
-                         std::int64_t incx,
-                         T_res *result,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event iamax(sycl::queue &queue,
-                         std::int64_t n,
-                         const T *x,
-                         std::int64_t incx,
-                         T_res *result,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      The number of elements in vector ``x``.
-
-   x
-      The pointer to the input vector ``x``. The array holding the
-      input vector ``x`` must be of size at least (1 + (``n`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      The stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      The pointer to where the zero-based index ``i`` of the maximal
-      element is stored.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/iamin.rst b/docs/domains/blas/iamin.rst
deleted file mode 100644
index fb724cc47..000000000
--- a/docs/domains/blas/iamin.rst
+++ /dev/null
@@ -1,160 +0,0 @@
-.. _onemkl_blas_iamin:
-
-iamin
-=====
-
-Finds the index of the element with the smallest absolute value.
-
-.. _onemkl_blas_iamin_description:
-
-.. rubric:: Description
-
-The ``iamin`` routines return an index ``i`` such that ``x[i]`` has
-the minimum absolute value of all elements in vector ``x`` (real
-variants), or such that (\|Re(``x[i]``)\| + \|Im(``x[i]``)\|) is minimal
-(complex variants).
-
-If either ``n`` or ``incx`` are not positive, the routine returns
-``0``.
-
-If more than one vector element is found with the same smallest
-absolute value, the index of the first one encountered is returned.
-
-If the vector contains ``NaN`` values, then the routine returns the
-index of the first ``NaN``.
-
-``iamin`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. container:: Note
-
-   .. rubric:: Note
-      :class: NoteTipHead
-
-   The index is zero-based.
-
-.. _onemkl_blas_iamin_buffer:
-
-iamin (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void iamin(sycl::queue &queue,
-                  std::int64_t n,
-                  sycl::buffer<T,1> &x,
-                  std::int64_t incx,
-                  sycl::buffer<std::int64_t,1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void iamin(sycl::queue &queue,
-                  std::int64_t n,
-                  sycl::buffer<T,1> &x,
-                  std::int64_t incx,
-                  sycl::buffer<std::int64_t,1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector x.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Buffer where the zero-based index ``i`` of the minimum element
-      will be stored.
-
-
-.. _onemkl_blas_iamin_usm:
-
-iamin (USM Version)
--------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event iamin(sycl::queue &queue,
-                         std::int64_t n,
-                         const T *x,
-                         std::int64_t incx,
-                         T_res *result,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event iamin(sycl::queue &queue,
-                         std::int64_t n,
-                         const T *x,
-                         std::int64_t incx,
-                         T_res *result,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      The pointer to input vector ``x``. The array holding input
-      vector ``x`` must be of size at least (1 + (``n`` -
-      1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector x.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Pointer to where the zero-based index ``i`` of the minimum
-      element will be stored.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/nrm2.rst b/docs/domains/blas/nrm2.rst
deleted file mode 100644
index 879862c73..000000000
--- a/docs/domains/blas/nrm2.rst
+++ /dev/null
@@ -1,158 +0,0 @@
-.. _onemkl_blas_nrm2:
-
-nrm2
-====
-
-Computes the Euclidean norm of a vector.
-
-.. _onemkl_blas_nrm2_description:
-
-.. rubric:: Description
-
-The ``nrm2`` routines computes Euclidean norm of a vector
-
-.. math:: 
-   
-      result = \| x\|   
-
-where:
-
-``x`` is a vector of ``n`` elements.
-
-``nrm2`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_res 
-      * -  ``float`` 
-        -  ``float`` 
-      * -  ``double`` 
-        -  ``double`` 
-      * -  ``std::complex<float>`` 
-        -  ``float`` 
-      * -  ``std::complex<double>`` 
-        -  ``double`` 
-
-.. _onemkl_blas_nrm2_buffer:
-
-nrm2 (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void nrm2(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T_res,1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void nrm2(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T_res,1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Buffer where the Euclidean norm of the vector ``x`` will be
-      stored.
-
-
-.. _onemkl_blas_nrm2_usm:
-
-nrm2 (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event nrm2(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        T_res *result,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event nrm2(sycl::queue &queue,
-                        std::int64_t n,
-                        const T *x,
-                        std::int64_t incx,
-                        T_res *result,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Pointer to where the Euclidean norm of the vector ``x`` will be
-      stored.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/rot.rst b/docs/domains/blas/rot.rst
deleted file mode 100644
index 736db3b4d..000000000
--- a/docs/domains/blas/rot.rst
+++ /dev/null
@@ -1,208 +0,0 @@
-.. _onemkl_blas_rot:
-
-rot
-===
-
-Performs rotation of points in the plane.
-
-.. _onemkl_blas_rot_description:
-
-.. rubric:: Description
-
-Given two vectors ``x`` and ``y`` of ``n`` elements, the ``rot`` routines
-compute four scalar-vector products and update the input vectors with
-the sum of two of these scalar-vector products as follow:
-
-.. math::
-  
-   \left[\begin{array}{c}
-      x\\y
-   \end{array}\right]
-   \leftarrow
-   \left[\begin{array}{c}
-      \phantom{-}c*x + s*y\\
-      -s*x + c*y
-   \end{array}\right]
-
-``rot`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_scalar 
-      * -  ``float`` 
-        -  ``float`` 
-      * -  ``double`` 
-        -  ``double`` 
-      * -  ``std::complex<float>`` 
-        -  ``float`` 
-      * -  ``std::complex<double>`` 
-        -  ``double`` 
-
-.. _onemkl_blas_rot_buffer:
-
-rot (Buffer Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void rot(sycl::queue &queue,
-                std::int64_t n,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &y,
-                std::int64_t incy,
-                T_scalar c,
-                T_scalar s)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void rot(sycl::queue &queue,
-                std::int64_t n,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &y,
-                std::int64_t incy,
-                T_scalar c,
-                T_scalar s)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   c
-      Scaling factor.
-
-   s
-      Scaling factor.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding updated buffer ``x``.
-
-   y
-      Buffer holding updated buffer ``y``.
-
-      
-
-.. _onemkl_blas_rot_usm:
-
-rot (USM Version)
------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event rot(sycl::queue &queue,
-                       std::int64_t n,
-                       T *x,
-                       std::int64_t incx,
-                       T *y,
-                       std::int64_t incy,
-                       T_scalar c,
-                       T_scalar s,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event rot(sycl::queue &queue,
-                       std::int64_t n,
-                       T *x,
-                       std::int64_t incx,
-                       T *y,
-                       std::int64_t incy,
-                       T_scalar c,
-                       T_scalar s,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input vector ``y``. The array holding input vector
-      ``y`` must be of size at least (1 + (``n`` - 1)*abs(``incy``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   c
-      Scaling factor.
-
-   s
-      Scaling factor.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the updated matrix ``x``.
-
-   y
-      Pointer to the updated matrix ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/rotg.rst b/docs/domains/blas/rotg.rst
deleted file mode 100644
index 18a065f0e..000000000
--- a/docs/domains/blas/rotg.rst
+++ /dev/null
@@ -1,175 +0,0 @@
-.. _onemkl_blas_rotg:
-
-rotg
-====
-
-Computes the parameters for a Givens rotation.
-
-.. _onemkl_blas_rotg_description:
-
-.. rubric:: Description
-
-Given the Cartesian coordinates ``(a, b)`` of a point, the ``rotg``
-routines return the parameters ``c``, ``s``, ``r``, and ``z``
-associated with the Givens rotation. The parameters ``c`` and ``s``
-define a unitary matrix such that:
-
-.. math::
-      
-      \begin{bmatrix}c & s \\ -s & c\end{bmatrix}.
-      \begin{bmatrix}a \\ b\end{bmatrix}
-      =\begin{bmatrix}r \\ 0\end{bmatrix} 
-
-The parameter ``z`` is defined such that if \|\ ``a``\ \| >
-\|\ ``b``\ \|, ``z`` is ``s``; otherwise if ``c`` is not 0 ``z`` is
-1/``c``; otherwise ``z`` is 1.
-
-``rotg`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_res 
-      * -  ``float`` 
-        -  ``float`` 
-      * -  ``double`` 
-        -  ``double`` 
-      * -  ``std::complex<float>`` 
-        -  ``float`` 
-      * -  ``std::complex<double>`` 
-        -  ``double`` 
-
-.. _onemkl_blas_rotg_buffer:
-
-rotg (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void rotg(sycl::queue &queue,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &b,
-                 sycl::buffer<T_real,1> &c,
-                 sycl::buffer<T,1> &s)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void rotg(sycl::queue &queue,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &b,
-                 sycl::buffer<T_real,1> &c,
-                 sycl::buffer<T,1> &s)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed
-
-   a
-      Buffer holding the ``x``-coordinate of the point.
-
-   b
-      Buffer holding the ``y``-coordinate of the point.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the parameter ``r`` associated with the Givens
-      rotation.
-
-   b
-      Buffer holding the parameter ``z`` associated with the Givens
-      rotation.
-
-   c
-      Buffer holding the parameter ``c`` associated with the Givens
-      rotation.
-
-   s
-      Buffer holding the parameter ``s`` associated with the Givens
-      rotation.
-
-
-.. _onemkl_blas_rotg_usm:
-
-rotg (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event rotg(sycl::queue &queue,
-                        T *a,
-                        T *b,
-                        T_real *c,
-                        T *s,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event rotg(sycl::queue &queue,
-                        T *a,
-                        T *b,
-                        T_real *c,
-                        T *s,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed
-
-   a
-      Pointer to the ``x``-coordinate of the point.
-
-   b
-      Pointer to the ``y``-coordinate of the point.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the parameter ``r`` associated with the Givens
-      rotation.
-
-   b
-      Pointer to the parameter ``z`` associated with the Givens
-      rotation.
-
-   c
-      Pointer to the parameter ``c`` associated with the Givens
-      rotation.
-
-   s
-      Pointer to the parameter ``s`` associated with the Givens
-      rotation.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/rotm.rst b/docs/domains/blas/rotm.rst
deleted file mode 100644
index da9a40c95..000000000
--- a/docs/domains/blas/rotm.rst
+++ /dev/null
@@ -1,266 +0,0 @@
-.. _onemkl_blas_rotm:
-
-rotm
-====
-
-Performs modified Givens rotation of points in the plane.
-
-.. _onemkl_blas_rotm_description:
-
-.. rubric:: Description
-
-Given two vectors ``x`` and ``y``, each vector element of these
-vectors is replaced as follows:
-
-.. math::
-
-      \begin{bmatrix}x_i \\ y_i\end{bmatrix}=
-      H
-      \begin{bmatrix}x_i \\ y_i\end{bmatrix} 
-
-for ``i`` from 1 to ``n``, where ``H`` is a modified Givens
-transformation matrix.
-
-``rotm`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_rotm_buffer:
-
-rotm (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void rotm(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &param)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void rotm(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &param)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   param
-      Buffer holding an array of size 5.
-
-      The elements of the ``param`` array are:
-
-      ``param[0]`` contains a switch, ``flag``. The other array elements
-      ``param[1-4]`` contain the components of the modified Givens 
-      transformation matrix ``H``:
-      h\ :sub:`11`, h\ :sub:`21`, h\ :sub:`12`, and
-      h\ :sub:`22`, respectively.
-
-      Depending on the values of ``flag``, the components of ``H``
-      are set as follows:
-
-      | ``flag = -1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & h_{12} \\ h_{21} & h_{22}\end{bmatrix} 
-
-      | ``flag = 0.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & h_{12} \\ h_{21} & 1.0\end{bmatrix} 
-
-      | ``flag = 1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & 1.0 \\ -1.0 & h_{22}\end{bmatrix} 
-
-      | ``flag = -2.0``:
-      
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & 0.0 \\ 0.0 & 1.0\end{bmatrix} 
-
-      In the last three cases, the matrix entries of 1.0, -1.0, and 0.0
-      are assumed based on the value of ``flag`` and are not required to
-      be set in the ``param`` vector.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding updated buffer ``x``.
-
-   y
-      Buffer holding updated buffer ``y``.
-
-      
-
-.. _onemkl_blas_rotm_usm:
-
-rotm (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event rotm(sycl::queue &queue,
-                        std::int64_t n,
-                        T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        T *param,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event rotm(sycl::queue &queue,
-                        std::int64_t n,
-                        T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        T *param,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-   
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Pointer to the input vector ``x``. The array holding the vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   yparam
-      Pointer to the input vector ``y``. The array holding the vector
-      ``y`` must be of size at least (1 + (``n`` - 1)*abs(``incy``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   param
-      Buffer holding an array of size 5.
-
-      The elements of the ``param`` array are:
-
-      ``param[0]`` contains a switch, ``flag``. The other array elements
-      ``param[1-4]`` contain the components of the modified Givens 
-      transformation matrix ``H``:
-      h\ :sub:`11`, h\ :sub:`21`, h\ :sub:`12`, and
-      h\ :sub:`22`, respectively.
-
-      Depending on the values of ``flag``, the components of ``H``
-      are set as follows:
-
-      | ``flag = -1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & h_{12} \\ h_{21} & h_{22}\end{bmatrix} 
-
-      | ``flag = 0.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & h_{12} \\ h_{21} & 1.0\end{bmatrix} 
-
-      | ``flag = 1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & 1.0 \\ -1.0 & h_{22}\end{bmatrix} 
-
-      | ``flag = -2.0``:
-      
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & 0.0 \\ 0.0 & 1.0\end{bmatrix} 
-
-      In the last three cases, the matrix entries of 1.0, -1.0, and 0.0
-      are assumed based on the value of ``flag`` and are not required to
-      be set in the ``param`` vector.
-   
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the updated array ``x``.
-
-   y
-      Pointer to the updated array ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/rotmg.rst b/docs/domains/blas/rotmg.rst
deleted file mode 100644
index 49a16ff00..000000000
--- a/docs/domains/blas/rotmg.rst
+++ /dev/null
@@ -1,257 +0,0 @@
-.. _onemkl_blas_rotmg:
-
-rotmg
-=====
-
-Computes the parameters for a modified Givens rotation.
-
-.. _onemkl_blas_rotmg_description:
-
-.. rubric:: Description
-
-Given Cartesian coordinates (``x1``, ``y1``) of an
-input vector, the ``rotmg`` routines compute the components of a modified
-Givens transformation matrix ``H`` that zeros the ``y``-component of
-the resulting vector:
-
-.. math::
-
-      \begin{bmatrix}x1 \\ 0\end{bmatrix}=
-      H
-      \begin{bmatrix}x1\sqrt{d1} \\ y1\sqrt{d2}\end{bmatrix} 
-      
-``rotmg`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_rotmg_buffer:
-
-rotmg (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void rotmg(sycl::queue &queue,
-                  sycl::buffer<T,1> &d1,
-                  sycl::buffer<T,1> &d2,
-                  sycl::buffer<T,1> &x1,
-                  sycl::buffer<T,1> &y1,
-                  sycl::buffer<T,1> &param)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void rotmg(sycl::queue &queue,
-                  sycl::buffer<T,1> &d1,
-                  sycl::buffer<T,1> &d2,
-                  sycl::buffer<T,1> &x1,
-                  sycl::buffer<T,1> &y1,
-                  sycl::buffer<T,1> &param)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   d1
-      Buffer holding the scaling factor for the ``x``-coordinate of the
-      input vector.
-
-   d2
-      Buffer holding the scaling factor for the ``y``-coordinate of the
-      input vector.
-
-   x1
-      Buffer holding the ``x``-coordinate of the input vector.
-
-   y1
-      Scalar specifying the ``y``-coordinate of the input vector.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   d1
-      Buffer holding the first diagonal element of the updated matrix.
-
-   d2
-      Buffer holding the second diagonal element of the updated matrix.
-
-   x1
-      Buffer holding the ``x``-coordinate of the rotated vector before
-      scaling
-
-   param
-      Buffer holding an array of size 5.
-
-      The elements of the ``param`` array are:
-
-      ``param[0]`` contains a switch, ``flag``. The other array elements
-      ``param[1-4]`` contain the components of the modified Givens 
-      transformation matrix ``H``:
-      h\ :sub:`11`, h\ :sub:`21`, h\ :sub:`12`, and
-      h\ :sub:`22`, respectively.
-
-      Depending on the values of ``flag``, the components of ``H`` are
-      set as follows:
-
-      | ``flag = -1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & h_{12} \\ h_{21} & h_{22}\end{bmatrix} 
-
-      | ``flag = 0.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & h_{12} \\ h_{21} & 1.0\end{bmatrix} 
-
-      | ``flag = 1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & 1.0 \\ -1.0 & h_{22}\end{bmatrix} 
-
-      | ``flag = -2.0``:
-      
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & 0.0 \\ 0.0 & 1.0\end{bmatrix} 
-
-      In the last three cases, the matrix entries of 1.0, -1.0, and 0.0
-      are assumed based on the value of ``flag`` and are not required to
-      be set in the ``param`` vector.
-
-      
-
-.. _onemkl_blas_rotmg_usm:
-
-rotmg (USM Version)
--------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event rotmg(sycl::queue &queue,
-                         T *d1,
-                         T *d2,
-                         T *x1,
-                         T *y1,
-                         T *param,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event rotmg(sycl::queue &queue,
-                         T *d1,
-                         T *d2,
-                         T *x1,
-                         T *y1,
-                         T *param,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   d1
-      Pointer to the scaling factor for the ``x``-coordinate of the
-      input vector.
-
-   d2
-      Pointer to the scaling factor for the ``y``-coordinate of the
-      input vector.
-
-   x1
-      Pointer to the ``x``-coordinate of the input vector.
-
-   y1
-      Scalar specifying the ``y``-coordinate of the input vector.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   d1
-      Pointer to the first diagonal element of the updated matrix.
-
-   d2
-      Pointer to the second diagonal element of the updated matrix.
-
-   x1
-      Pointer to the ``x``-coordinate of the rotated vector before
-      scaling
-
-   param
-      Buffer holding an array of size 5.
-
-      The elements of the ``param`` array are:
-
-      ``param[0]`` contains a switch, ``flag``. The other array elements
-      ``param[1-4]`` contain the components of the modified Givens 
-      transformation matrix ``H``:
-      h\ :sub:`11`, h\ :sub:`21`, h\ :sub:`12`, and
-      h\ :sub:`22`, respectively.
-
-      Depending on the values of ``flag``, the components of ``H``
-      are set as follows:
-
-      | ``flag = -1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & h_{12} \\ h_{21} & h_{22}\end{bmatrix} 
-
-      | ``flag = 0.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & h_{12} \\ h_{21} & 1.0\end{bmatrix} 
-
-      | ``flag = 1.0``:
-
-      .. math::
-   
-         H=\begin{bmatrix}h_{11} & 1.0 \\ -1.0 & h_{22}\end{bmatrix} 
-
-      | ``flag = -2.0``:
-      
-      .. math::
-   
-         H=\begin{bmatrix}1.0 & 0.0 \\ 0.0 & 1.0\end{bmatrix} 
-
-      In the last three cases, the matrix entries of 1.0, -1.0, and 0.0
-      are assumed based on the value of ``flag`` and are not required to
-      be set in the ``param`` vector.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/sbmv.rst b/docs/domains/blas/sbmv.rst
deleted file mode 100644
index a0c071f3c..000000000
--- a/docs/domains/blas/sbmv.rst
+++ /dev/null
@@ -1,244 +0,0 @@
-.. _onemkl_blas_sbmv:
-
-sbmv
-====
-
-Computes a matrix-vector product with a symmetric band matrix.
-
-.. _onemkl_blas_sbmv_description:
-
-.. rubric:: Description
-
-The ``sbmv`` routines compute a scalar-matrix-vector product and add the
-result to a scalar-vector product, with a symmetric band matrix. The
-operation is defined as:
-
-.. math::
-
-      y \leftarrow alpha*A*x + beta*y
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``n``-by-``n`` symmetric matrix with ``k``
-super-diagonals,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``sbmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_sbmv_buffer:
-
-sbmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void sbmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void sbmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of super-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` + 1),
-      and positive.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_sbmv_usm:
-
-sbmv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event sbmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event sbmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of super-diagonals of the matrix ``A``. Must be at least
-      zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` +
-      1), and positive.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/scal.rst b/docs/domains/blas/scal.rst
deleted file mode 100644
index 155b5fc49..000000000
--- a/docs/domains/blas/scal.rst
+++ /dev/null
@@ -1,162 +0,0 @@
-.. _onemkl_blas_scal:
-
-scal
-====
-
-Computes the product of a vector by a scalar.
-
-.. _onemkl_blas_scal_description:
-
-.. rubric:: Description
-
-The ``scal`` routines computes a scalar-vector product:
-
-.. math::
-
-      x \leftarrow alpha*x
-
-where:
-
-``x`` is a vector of ``n`` elements,
-
-``alpha`` is a scalar.
-
-``scal`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-        -  T_scalar 
-      * -  ``float`` 
-        -  ``float`` 
-      * -  ``double`` 
-        -  ``double`` 
-      * -  ``std::complex<float>`` 
-        -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-        -  ``std::complex<double>`` 
-      * -  ``std::complex<float>`` 
-        -  ``float`` 
-      * -  ``std::complex<double>`` 
-        -  ``double`` 
-
-.. _onemkl_blas_scal_buffer:
-
-scal (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void scal(sycl::queue &queue,
-                 std::int64_t n,
-                 T_scalar alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void scal(sycl::queue &queue,
-                 std::int64_t n,
-                 T_scalar alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   alpha
-      Specifies the scalar ``alpha``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding updated buffer ``x``.
-
-
-.. _onemkl_blas_scal_usm:
-
-scal (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event scal(sycl::queue &queue,
-                        std::int64_t n,
-                        T_scalar alpha,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event scal(sycl::queue &queue,
-                        std::int64_t n,
-                        T_scalar alpha,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   alpha
-      Specifies the scalar ``alpha``.
-
-   x
-      Pointer to the input vector ``x``. The array must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the updated array ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/sdsdot.rst b/docs/domains/blas/sdsdot.rst
deleted file mode 100644
index 34d939c2f..000000000
--- a/docs/domains/blas/sdsdot.rst
+++ /dev/null
@@ -1,172 +0,0 @@
-.. _onemkl_blas_sdsdot:
-
-sdsdot
-======
-
-Computes a vector-vector dot product with double precision.
-
-.. _onemkl_blas_sdsdot_description:
-
-.. rubric:: Description
-
-The ``sdsdot`` routines perform a dot product between two vectors with
-double precision:
-
-.. math::
-
-   result = sb + \sum_{i=1}^{n}X_iY_i
-
-.. _onemkl_blas_sdsdot_buffer:
-
-sdsdot (Buffer Version)
------------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void sdsdot(sycl::queue &queue,
-                   std::int64_t n,
-                   float sb,
-                   sycl::buffer<float,1> &x,
-                   std::int64_t incx,
-                   sycl::buffer<float,1> &y,
-                   std::int64_t incy,
-                   sycl::buffer<float,1> &result)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void sdsdot(sycl::queue &queue,
-                   std::int64_t n,
-                   float sb,
-                   sycl::buffer<float,1> &x,
-                   std::int64_t incx,
-                   sycl::buffer<float,1> &y,
-                   std::int64_t incy,
-                   sycl::buffer<float,1> &result)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vectors ``x`` and ``y``.
-
-   sb
-      Single precision scalar to be added to the dot product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size
-      at least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size
-      at least (1 + (``n`` - 1)*abs(``incxy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Buffer where the result (a scalar) will be stored. If ``n`` < 0
-      the result is ``sb``.
-
-
-.. _onemkl_blas_sdsdot_usm:
-
-sdsdot (USM Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event sdsdot(sycl::queue &queue,
-                          std::int64_t n,
-                          float sb,
-                          const float *x,
-                          std::int64_t incx,
-                          const float *y,
-                          std::int64_t incy,
-                          float *result,
-                          const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event sdsdot(sycl::queue &queue,
-                          std::int64_t n,
-                          float sb,
-                          const float *x,
-                          std::int64_t incx,
-                          const float *y,
-                          std::int64_t incy,
-                          float *result,
-                          const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vectors ``x`` and ``y``.
-
-   sb
-      Single precision scalar to be added to the dot product.
-
-   x
-      Pointer to the input vector ``x``. The array must be of size
-      at least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage`
-      for more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to the input vector ``y``. The array must be of size
-      at least (1 + (``n`` - 1)*abs(``incxy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if
-      any. If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   result
-      Pointer to where the result (a scalar) will be stored. If
-      ``n`` < 0 the result is ``sb``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/spmv.rst b/docs/domains/blas/spmv.rst
deleted file mode 100644
index 3ae196901..000000000
--- a/docs/domains/blas/spmv.rst
+++ /dev/null
@@ -1,220 +0,0 @@
-.. _onemkl_blas_spmv:
-
-spmv
-====
-
-Computes a matrix-vector product with a symmetric packed matrix.
-
-.. _onemkl_blas_spmv_description:
-
-.. rubric:: Description
-
-The ``spmv`` routines compute a scalar-matrix-vector product and add the
-result to a scalar-vector product, with a symmetric packed matrix.
-The operation is defined as:
-
-.. math::
-
-      y \leftarrow alpha*A*x + beta*y
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``n``-by-``n`` symmetric matrix, supplied in packed form,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``spmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_spmv_buffer:
-
-spmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void spmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void spmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n``\ +1))/2. See :ref:`matrix-storage` for
-      more details.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-   
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_spmv_usm:
-
-spmv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event spmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event spmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-   
-.. container:: section
-      
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n``\ +1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   beta
-      Scaling factor for vector ``y``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/spr.rst b/docs/domains/blas/spr.rst
deleted file mode 100644
index 2ef91546d..000000000
--- a/docs/domains/blas/spr.rst
+++ /dev/null
@@ -1,193 +0,0 @@
-.. _onemkl_blas_spr:
-
-spr
-===
-
-Performs a rank-1 update of a symmetric packed matrix.
-
-.. _onemkl_blas_spr_description:
-
-.. rubric:: Description
-
-The ``spr`` routines compute a scalar-vector-vector product and add the
-result to a symmetric packed matrix. The operation is defined as:
-
-.. math::
-
-      A \leftarrow alpha*x*x^T + A
-
-where:
-
-``alpha`` is scalar,
-
-``A`` is an ``n``-by-``n`` symmetric matrix, supplied in packed form,
-
-``x`` is a vector of length ``n``.
-
-``spr`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_spr_buffer:
-
-spr (Buffer Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void spr(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void spr(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n`` + 1))/2. See :ref:`matrix-storage` for
-      more details.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-      :class: sectiontitle
-
-   a
-      Buffer holding the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      
-
-.. _onemkl_blas_spr_usm:
-
-spr (USM Version)
------------------
-
-.. rubric:: Syntax
-         
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event spr(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event spr(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-   
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n`` + 1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-      
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/spr2.rst b/docs/domains/blas/spr2.rst
deleted file mode 100644
index 6cd195cbb..000000000
--- a/docs/domains/blas/spr2.rst
+++ /dev/null
@@ -1,213 +0,0 @@
-.. _onemkl_blas_spr2:
-
-spr2
-====
-
-Computes a rank-2 update of a symmetric packed matrix.
-
-.. _onemkl_blas_spr2_description:
-
-.. rubric:: Description
-
-The ``spr2`` routines compute two scalar-vector-vector products and add
-them to a symmetric packed matrix. The operation is defined as:
-
-.. math::
-
-      A \leftarrow alpha*x*y^T + alpha*y*x^T + A
-
-where:
-
-``alpha`` is scalar,
-
-``A`` is an ``n``-by-``n`` symmetric matrix, supplied in packed form,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``spr`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_spr2_buffer:
-
-spr2 (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void spr2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void spr2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n``-1))/2. See :ref:`matrix-storage` for
-      more details.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper`` or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-
-.. _onemkl_blas_spr2_usm:
-
-spr2 (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event spr2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event spr2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n``-1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper`` or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/swap.rst b/docs/domains/blas/swap.rst
deleted file mode 100644
index 79c2d4121..000000000
--- a/docs/domains/blas/swap.rst
+++ /dev/null
@@ -1,184 +0,0 @@
-.. _onemkl_blas_swap:
-
-swap
-====
-
-Swaps a vector with another vector.
-
-.. _onemkl_blas_swap_description:
-
-.. rubric:: Description
-
-Given two vectors of ``n`` elements, ``x`` and ``y``, the ``swap``
-routines return vectors ``y`` and ``x`` swapped, each replacing the
-other.
-
-.. math::
-
-   \left[\begin{array}{c}
-      y\\x
-   \end{array}\right]
-   \leftarrow
-   \left[\begin{array}{c}
-      x\\y
-   \end{array}\right]
-
-``swap`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_swap_buffer:
-
-swap (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void swap(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void swap(sycl::queue &queue,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input vector ``y``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding updated buffer ``x``, that is, the input vector
-      ``y``.
-
-   y
-      Buffer holding updated buffer ``y``, that is, the input vector
-      ``x``.
-
-      
-
-.. _onemkl_blas_swap_usm:
-
-swap (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event swap(sycl::queue &queue,
-                        std::int64_t n,
-                        T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event swap(sycl::queue &queue,
-                        std::int64_t n,
-                        T *x,
-                        std::int64_t incx,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-   
-.. container:: section
-   
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   n
-      Number of elements in vector ``x``.
-
-   x
-      Pointer to the input vector ``x``. The array must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to the input vector ``y``. The array must be of size at
-      least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the updated array ``x``, that is, the input vector
-      ``y``.
-
-   y
-      Pointer to the updated array ``y``, that is, the input vector
-      ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-1-routines`
diff --git a/docs/domains/blas/symm.rst b/docs/domains/blas/symm.rst
deleted file mode 100644
index 26e16e499..000000000
--- a/docs/domains/blas/symm.rst
+++ /dev/null
@@ -1,311 +0,0 @@
-.. _onemkl_blas_symm:
-
-symm
-====
-
-Computes a matrix-matrix product where one input matrix is symmetric
-and one matrix is general.
-
-.. _onemkl_blas_symm_description:
-
-.. rubric:: Description
-
-The ``symm`` routines compute a scalar-matrix-matrix product and add the
-result to a scalar-matrix product, where one of the matrices in the
-multiplication is symmetric. The argument ``left_right`` determines
-if the symmetric matrix, ``A``, is on the left of the multiplication
-(``left_right`` = ``side::left``) or on the right (``left_right`` =
-``side::right``). Depending on ``left_right``, the operation is
-defined as:
-
-.. math::
-
-      C \leftarrow alpha*A*B + beta*C
-
-or
-
-.. math::
-
-      C \leftarrow alpha*B*A + beta*C
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is a symmetric matrix, either ``m``-by-``m`` or ``n``-by-``n``,
-
-``B`` and ``C`` are ``m``-by-``n`` matrices.
-
-``symm`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_symm_buffer:
-
-symm (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void symm(sycl::queue &queue,
-                 onemkl::side left_right,
-                 onemkl::uplo upper_lower,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb,
-                 T beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void symm(sycl::queue &queue,
-                 onemkl::side left_right,
-                 onemkl::uplo upper_lower,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb,
-                 T beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` is on the left side of the multiplication
-      (``side::left``) or on the right side (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of ``B`` and ``C``. The value of ``m`` must be at
-      least zero.
-
-   n
-      Number of columns of ``B`` and ``C``. The value of ``n`` must be
-      at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``A`` is on the left of the multiplication,
-      or ``lda``\ \*\ ``n`` if ``A`` is on the right. See :ref:`matrix-storage`
-      for more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if ``A`` is on
-      the left of the multiplication, or at least ``n`` if ``A`` is on
-      the right. Must be positive.
-
-   b
-      Buffer holding input matrix ``B``. Must have size at least
-      ``ldb``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldb``\ \*\ ``m`` if row
-      major layout is used to store matrices. See :ref:`matrix-storage` for
-      more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      The buffer holding the input/output matrix ``C``. It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices. See :ref:`matrix-storage` for more details.
-
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by ``alpha``\ \*\ ``A``\ \*\ ``B`` +
-      ``beta``\ \*\ ``C`` (``left_right`` = ``side::left``) or
-      ``alpha``\ \*\ ``B``\ \*\ ``A`` + ``beta``\ \*\ ``C``
-      (``left_right`` = ``side::right``).
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized before
-   calling ``symm``.
-
-
-.. _onemkl_blas_symm_usm:
-
-symm (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event symm(sycl::queue &queue,
-                        onemkl::side left_right,
-                        onemkl::uplo upper_lower,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        const T* b,
-                        std::int64_t ldb,
-                        T beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event symm(sycl::queue &queue,
-                        onemkl::side left_right,
-                        onemkl::uplo upper_lower,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        const T* b,
-                        std::int64_t ldb,
-                        T beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` is on the left side of the
-      multiplication (``side::left``) or on the right side
-      (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of ``B`` and ``C``. The value of ``m`` must be
-      at least zero.
-
-   n
-      Number of columns of ``B`` and ``C``. The value of ``n`` must
-      be at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Pointer to input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``A`` is on the left of the
-      multiplication, or ``lda``\ \*\ ``n`` if ``A`` is on the right.
-      See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if ``A`` is
-      on the left of the multiplication, or at least ``n`` if ``A``
-      is on the right. Must be positive.
-
-   b
-      Pointer to input matrix ``B``. Must have size at least
-      ``ldb``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldb``\ \*\ ``m`` if row
-      major layout is used to store matrices. See :ref:`matrix-storage` for
-      more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-      
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      The pointer to input/output matrix ``C``. It must have a
-      size of at least ``ldc``\ \*\ ``n`` if column major layout is
-      used to store matrices or at least ``ldc``\ \*\ ``m`` if row
-      major layout is used to store matrices . See :ref:`matrix-storage` for more details.
-
-   ldc
-      The leading dimension of ``C``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if column major layout is used to store matrices.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by
-      ``alpha``\ \*\ ``A``\ \*\ ``B`` + ``beta``\ \*\ ``C``
-      (``left_right`` = ``side::left``) or
-      ``alpha``\ \*\ ``B``\ \*\ ``A`` + ``beta``\ \*\ ``C``
-      (``left_right`` = ``side::right``).
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``beta`` = 0, matrix ``C`` does not need to be initialized
-   before calling ``symm``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/symv.rst b/docs/domains/blas/symv.rst
deleted file mode 100644
index d3750ec58..000000000
--- a/docs/domains/blas/symv.rst
+++ /dev/null
@@ -1,226 +0,0 @@
-.. _onemkl_blas_symv:
-
-symv
-====
-
-Computes a matrix-vector product for a symmetric matrix.
-
-.. _onemkl_blas_symv_description:
-
-.. rubric:: Description
-
-The ``symv`` routines routines compute a scalar-matrix-vector product and
-add the result to a scalar-vector product, with a symmetric matrix.
-The operation is defined as:
-
-.. math::
-
-      y \leftarrow alpha*A*x + beta*y
-
-where:
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` is an ``n``-by-``n`` symmetric matrix,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``symv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_symv_buffer:
-
-symv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void symv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void symv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 T beta,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``m``, and
-      positive.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Buffer holding the updated vector ``y``.
-
-
-.. _onemkl_blas_symv_usm:
-
-symv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event symv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event symv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *a,
-                        std::int64_t lda,
-                        const T *x,
-                        std::int64_t incx,
-                        T beta,
-                        T *y,
-                        std::int64_t incy,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``m``, and
-      positive.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   y
-      Pointer to the updated vector ``y``.
-
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/syr.rst b/docs/domains/blas/syr.rst
deleted file mode 100644
index 74f692a70..000000000
--- a/docs/domains/blas/syr.rst
+++ /dev/null
@@ -1,202 +0,0 @@
-.. _onemkl_blas_syr:
-
-syr
-===
-
-Computes a rank-1 update of a symmetric matrix.
-
-.. _onemkl_blas_syr_description:
-
-.. rubric:: Description
-
-The ``syr`` routines compute a scalar-vector-vector product add them and
-add the result to a matrix, with a symmetric matrix. The operation is
-defined as:
-
-.. math::
-
-      A \leftarrow alpha*x*x^T + A
-
-where:
-
-``alpha`` is scalar,
-
-``A`` is an ``n``-by-``n`` symmetric matrix,
-
-``x`` is a vector of length ``n``.
-
-``syr`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_syr_buffer:
-
-syr (Buffer Version)
---------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void syr(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a,
-                std::int64_t lda)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void syr(sycl::queue &queue,
-                onemkl::uplo upper_lower,
-                std::int64_t n,
-                T alpha,
-                sycl::buffer<T,1> &x,
-                std::int64_t incx,
-                sycl::buffer<T,1> &a,
-                std::int64_t lda)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper`` or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-
-.. _onemkl_blas_syr_usm:
-
-syr (USM Version)
------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event syr(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       std::int64_t lda,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event syr(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       std::int64_t n,
-                       T alpha,
-                       const T *x,
-                       std::int64_t incx,
-                       T *a,
-                       std::int64_t lda,
-                       const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper`` or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/syr2.rst b/docs/domains/blas/syr2.rst
deleted file mode 100644
index b7628e3f1..000000000
--- a/docs/domains/blas/syr2.rst
+++ /dev/null
@@ -1,228 +0,0 @@
-.. _onemkl_blas_syr2:
-
-syr2
-====
-
-Computes a rank-2 update of a symmetric matrix.
-
-.. _onemkl_blas_syr2_description:
-
-.. rubric:: Description
-
-The ``syr2`` routines compute two scalar-vector-vector product add them
-and add the result to a matrix, with a symmetric matrix. The
-operation is defined as:
-
-.. math::
-
-      A \leftarrow alpha*x*y^T + alpha*y*x^T + A
-      
-where:
-
-``alpha`` is a scalar,
-
-``A`` is an ``n``-by-``n`` symmetric matrix,
-
-``x`` and ``y`` are vectors of length ``n``.
-
-``syr2`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_blas_syr2_buffer:
-
-syr2 (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void syr2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void syr2(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx,
-                 sycl::buffer<T,1> &y,
-                 std::int64_t incy,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Buffer holding input/output vector ``y``. The buffer must be of
-      size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage`
-      for more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Buffer holding the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-      
-
-.. _onemkl_blas_syr2_usm:
-
-syr2 (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event syr2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event syr2(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        std::int64_t n,
-                        T alpha,
-                        const T *x,
-                        std::int64_t incx,
-                        const T *y,
-                        std::int64_t incy,
-                        T *a,
-                        std::int64_t lda,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of columns of ``A``. Must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-vector product.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   y
-      Pointer to input/output vector ``y``. The array holding
-      input/output vector ``y`` must be of size at least (1 + (``n``
-      - 1)*abs(``incy``)). See :ref:`matrix-storage` for
-      more details.
-
-   incy
-      Stride of vector ``y``.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   a
-      Pointer to the updated upper triangular part of the symmetric
-      matrix ``A`` if ``upper_lower``\ \=\ ``upper``, or the updated lower
-      triangular part of the symmetric matrix ``A`` if
-      ``upper_lower``\ \=\ ``lower``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/syr2k.rst b/docs/domains/blas/syr2k.rst
deleted file mode 100644
index 8605779c6..000000000
--- a/docs/domains/blas/syr2k.rst
+++ /dev/null
@@ -1,397 +0,0 @@
-.. _onemkl_blas_syr2k:
-
-syr2k
-=====
-
-Performs a symmetric rank-2k update.
-
-.. _onemkl_blas_syr2k_description:
-
-.. rubric:: Description
-
-The ``syr2k`` routines perform a rank-2k update of an ``n`` x ``n``
-symmetric matrix ``C`` by general matrices ``A`` and ``B``. 
-
-If ``trans`` = ``transpose::nontrans``, the operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*(A*B^T + B*A^T) + beta*C
-
-where ``A`` and ``B`` are ``n`` x ``k`` matrices.
-
-If ``trans`` = ``transpose::trans``, the operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*(A^T*B + B^T*A) + beta * C
-
-
-where ``A`` and ``B`` are ``k`` x ``n`` matrices.
-
-
-In both cases:
-
-``alpha`` and ``beta`` are scalars,
-
-``C`` is a symmetric matrix and ``A``,\ ``B`` are general matrices,
-
-The inner dimension of both matrix multiplications is ``k``.
-
-``syr2k`` supports the following precisions:
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_syr2k_buffer:
-
-syr2k (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void syr2k(sycl::queue &queue,
-                  onemkl::uplo upper_lower,
-                  onemkl::transpose trans,
-                  std::int64_t n,
-                  std::int64_t k,
-                  T alpha,
-                  sycl::buffer<T,1> &a,
-                  std::int64_t lda,
-                  sycl::buffer<T,1> &b,
-                  std::int64_t ldb,
-                  T beta,
-                  sycl::buffer<T,1> &c,
-                  std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void syr2k(sycl::queue &queue,
-                  onemkl::uplo upper_lower,
-                  onemkl::transpose trans,
-                  std::int64_t n,
-                  std::int64_t k,
-                  T alpha,
-                  sycl::buffer<T,1> &a,
-                  std::int64_t lda,
-                  sycl::buffer<T,1> &b,
-                  std::int64_t ldb,
-                  T beta,
-                  sycl::buffer<T,1> &c,
-                  std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies the operation to apply, as described above. Conjugation
-      is never performed, even if ``trans`` = ``transpose::conjtrans``.
-
-   n
-      Number of rows and columns in ``C``.The value of ``n`` must be at
-      least zero.
-
-   k
-      Inner dimension of matrix multiplications.The value of ``k`` must
-      be at least zero.
-
-   alpha
-      Scaling factor for the rank-2k update.
-
-   a
-      Buffer holding input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-
-      See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   b
-      Buffer holding input matrix ``B``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-         * - Row major
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-
-      See :ref:`matrix-storage`
-      for more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-         * - Row major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      Buffer holding input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least ``n``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by the updated ``C`` matrix.
-
-      
-
-.. _onemkl_blas_syr2k_usm:
-
-syr2k (USM Version)
--------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event syr2k(sycl::queue &queue,
-                         onemkl::uplo upper_lower,
-                         onemkl::transpose trans,
-                         std::int64_t n,
-                         std::int64_t k,
-                         T alpha,
-                         const T* a,
-                         std::int64_t lda,
-                         const T* b,
-                         std::int64_t ldb,
-                         T beta,
-                         T* c,
-                         std::int64_t ldc,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event syr2k(sycl::queue &queue,
-                         onemkl::uplo upper_lower,
-                         onemkl::transpose trans,
-                         std::int64_t n,
-                         std::int64_t k,
-                         T alpha,
-                         const T* a,
-                         std::int64_t lda,
-                         const T* b,
-                         std::int64_t ldb,
-                         T beta,
-                         T* c,
-                         std::int64_t ldc,
-                         const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies the operation to apply, as described above.
-      Conjugation is never performed, even if ``trans`` =
-      ``transpose::conjtrans``.
-
-   n
-      Number of rows and columns in ``C``. The value of ``n`` must be
-      at least zero.
-
-   k
-      Inner dimension of matrix multiplications.The value of ``k``
-      must be at least zero.
-
-   alpha
-      Scaling factor for the rank-2k update.
-
-   a
-      Pointer to input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   b
-      Pointer to input matrix ``B``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-         * - Row major
-           - ``B`` is an ``n``-by-``k`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``n``.
-           - ``B`` is an ``k``-by-``n`` matrix so the array ``b``
-             must have size at least ``ldb``\ \*\ ``k``.
-   
-      See :ref:`matrix-storage` for
-      more details.
-
-   ldb
-      The leading dimension of ``B``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``ldb`` must be at least ``n``.
-           - ``ldb`` must be at least ``k``.
-         * - Row major
-           - ``ldb`` must be at least ``k``.
-           - ``ldb`` must be at least ``n``.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      Pointer to input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least
-      ``n``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by the updated ``C``
-      matrix.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/syrk.rst b/docs/domains/blas/syrk.rst
deleted file mode 100644
index 74cf63af0..000000000
--- a/docs/domains/blas/syrk.rst
+++ /dev/null
@@ -1,296 +0,0 @@
-.. _onemkl_blas_syrk:
-
-syrk
-====
-
-Performs a symmetric rank-k update.
-
-.. _onemkl_blas_syrk_description:
-
-.. rubric:: Description
-
-The ``syrk`` routines perform a rank-k update of a symmetric matrix ``C``
-by a general matrix ``A``. The operation is defined as:
-
-.. math::
-
-      C \leftarrow alpha*op(A)*op(A)^T + beta*C
-
-where:
-
-op(``X``) is one of op(``X``) = ``X`` or op(``X``) = ``X``\ :sup:`T`
-,
-
-``alpha`` and ``beta`` are scalars,
-
-``C`` is a symmetric matrix and ``A``\ is a general matrix.
-
-Here op(``A``) is ``n``-by-``k``, and ``C`` is ``n``-by-``n``.
-
-``syrk`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_syrk_buffer:
-
-syrk (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void syrk(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 T beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void syrk(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 std::int64_t n,
-                 std::int64_t k,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 T beta,
-                 sycl::buffer<T,1> &c,
-                 std::int64_t ldc)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A`` (See :ref:`onemkl_datatypes` for more details). Conjugation is never performed, even if ``trans`` = ``transpose::conjtrans``.
-
-   n
-      Number of rows and columns in ``C``. The value of ``n`` must be at
-      least zero.
-
-   k
-      Number of columns in op(``A``).The value of ``k`` must be at least
-      zero.
-
-   alpha
-      Scaling factor for the rank-k update.
-
-   a
-      Buffer holding input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-
-      See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-      
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      Buffer holding input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least ``n``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by
-      ``alpha``\ \*op(``A``)*op(``A``)\ :sup:`T` + ``beta``\ \*\ ``C``.
-
-
-.. _onemkl_blas_syrk_usm:
-
-syrk (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event syrk(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event syrk(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        std::int64_t n,
-                        std::int64_t k,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T beta,
-                        T* c,
-                        std::int64_t ldc,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A``'s data is stored in its upper or lower
-      triangle. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A`` (See :ref:`onemkl_datatypes` for more details). Conjugation is never performed, even if
-      ``trans`` = ``transpose::conjtrans``.
-
-   n
-      Number of rows and columns in ``C``. The value of ``n`` must be
-      at least zero.
-
-   k
-      Number of columns in op(``A``). The value of ``k`` must be at
-      least zero.
-
-   alpha
-      Scaling factor for the rank-k update.
-
-   a
-      Pointer to input matrix ``A``.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``
-         * - Row major
-           - ``A`` is an ``n``-by-``k`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``n``.
-           - ``A`` is an ``k``-by-``n`` matrix so the array ``a``
-             must have size at least ``lda``\ \*\ ``k``.
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      The leading dimension of ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``trans`` = ``transpose::nontrans``
-           - ``trans`` = ``transpose::trans`` or ``transpose::conjtrans``
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   beta
-      Scaling factor for matrix ``C``.
-
-   c
-      Pointer to input/output matrix ``C``. Must have size at least
-      ``ldc``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   ldc
-      Leading dimension of ``C``. Must be positive and at least
-      ``n``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Pointer to the output matrix, overwritten by
-      ``alpha``\ \*op(``A``)*op(``A``)\ :sup:`T` +
-      ``beta``\ \*\ ``C``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/syrk_batch.rst b/docs/domains/blas/syrk_batch.rst
deleted file mode 100644
index b9782041e..000000000
--- a/docs/domains/blas/syrk_batch.rst
+++ /dev/null
@@ -1,484 +0,0 @@
-.. _onemkl_blas_syrk_batch:
-
-syrk_batch
-==========
-
-Computes a group of ``syrk`` operations.
-
-.. _onemkl_blas_syrk_batch_description:
-
-.. rubric:: Description
-
-The ``syrk_batch`` routines are batched versions of :ref:`onemkl_blas_syrk`, performing
-multiple ``syrk`` operations in a single call. Each ``syrk`` 
-operation perform a rank-k update with general matrices.
-   
-``syrk_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_syrk_batch_buffer:
-
-syrk_batch (Buffer Version)
----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``syrk_batch`` supports only the strided API. 
-
-The strided API operation is defined as:
-::
-
-   for i = 0 … batch_size – 1
-       A and C are matrices at offset i * stridea, i * stridec in a and c.
-       C := alpha * op(A) * op(A)^T + beta * C
-   end for
-
-where:
-
-op(X) is one of op(X) = X, or op(X) = X\ :sup:`T`, or op(X) = X\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` and ``C`` are matrices,
-
-op(``A``) is ``n`` x ``k`` and ``C`` is ``n`` x ``n``.
-
-The ``a`` and ``c`` buffers contain all the input matrices. The stride 
-between matrices is given by the stride parameter. The total number
-of matrices in ``a`` and ``c`` buffers is given by the ``batch_size`` parameter.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void syrk_batch(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       onemkl::transpose trans,
-                       std::int64_t n,
-                       std::int64_t k,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       T beta,
-                       sycl::buffer<T,1> &c,
-                       std::int64_t ldc,
-                       std::int64_t stridec,
-                       std::int64_t batch_size)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void syrk_batch(sycl::queue &queue,
-                       onemkl::uplo upper_lower,
-                       onemkl::transpose trans,
-                       std::int64_t n,
-                       std::int64_t k,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       T beta,
-                       sycl::buffer<T,1> &c,
-                       std::int64_t ldc,
-                       std::int64_t stridec,
-                       std::int64_t batch_size)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether data in ``C`` is stored in its upper or lower triangle.
-      For more details, see :ref:`onemkl_datatypes`.
-
-   trans
-      Specifies op(``A``) the transposition operation applied to the
-      matrix ``A``. Conjugation is never performed, even if trans =
-      transpose::conjtrans. See :ref:`onemkl_datatypes` for more
-      details.
-
-   n
-      Number of rows and columns of ``C``.
-      Must be at least zero.
-
-   k
-      Number of columns of op(``A``).
-      Must be at least zero.
-
-   alpha
-      Scaling factor for the rank-k update.
-
-   a
-      Buffer holding the input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   beta
-      Scaling factor for the matrices ``C``.
-
-   c
-      Buffer holding input/output matrices ``C`` with size ``stridec`` * ``batch_size``.
-
-   ldc
-      The leading dimension of the matrices ``C``. It must be positive
-      and at least ``n``.
-
-   stridec
-      Stride between different ``C`` matrices. Must be at least
-      ``ldc`` * ``n``.
-
-   batch_size
-      Specifies the number of rank-k update operations to perform.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output buffer, overwritten by ``batch_size`` rank-k update
-      operations of the form ``alpha`` * op(``A``)*op(``A``)^T + ``beta`` * ``C``.
-
-
-.. _onemkl_blas_syrk_batch_usm:
-
-syrk_batch (USM Version)
----------------------------
-
-.. rubric:: Description
-
-The USM version of ``syrk_batch`` supports the group API and strided API. 
-
-The group API operation is defined as:
-::
-
-   idx = 0
-   for i = 0 … group_count – 1
-       for j = 0 … group_size – 1
-           A, B, and C are matrices in a[idx] and c[idx]
-           C := alpha[i] * op(A) * op(A)^T + beta[i] * C
-           idx = idx + 1
-       end for
-   end for
-
-The strided API operation is defined as
-::
-
-   for i = 0 … batch_size – 1
-       A, B and C are matrices at offset i * stridea, i * stridec in a and c.
-       C := alpha * op(A) * op(A)^T + beta * C
-   end for
-
-where:
-
-op(X) is one of op(X) = X, or op(X) = X\ :sup:`T`, or op(X) = X\ :sup:`H`,
-
-``alpha`` and ``beta`` are scalars,
-
-``A`` and ``C`` are matrices,
-
-op(``A``) is ``n`` x ``k`` and ``C`` is ``n`` x ``n``.
-
- 
-For group API, ``a`` and ``c`` arrays contain the pointers for all the input matrices. 
-The total number of matrices in ``a`` and ``c`` are given by: 
-
-.. math::
-
-      total\_batch\_count = \sum_{i=0}^{group\_count-1}group\_size[i]    
- 
-For strided API, ``a`` and ``c`` arrays contain all the input matrices. The total number of matrices 
-in ``a`` and ``c`` are given by the ``batch_size`` parameter.  
-   
-**Group API**
-
-.. rubric:: Syntax
-   
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event syrk_batch(sycl::queue &queue,
-                              uplo *upper_lower,
-                              transpose *trans,
-                              std::int64_t *n,
-                              std::int64_t *k,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              T *beta,
-                              T **c,
-                              std::int64_t *ldc,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event syrk_batch(sycl::queue &queue,
-                              uplo *upper_lower,
-                              transpose *trans,
-                              std::int64_t *n,
-                              std::int64_t *k,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              T *beta,
-                              T **c,
-                              std::int64_t *ldc,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Array of ``group_count`` ``onemkl::upper_lower``
-      values. ``upper_lower[i]`` specifies whether data in C for every
-      matrix in group ``i`` is in upper or lower triangle.
-
-   trans
-      Array of ``group_count`` ``onemkl::transpose`` values. ``trans[i]`` specifies the form of op(``A``) used in
-      the rank-k update in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Array of ``group_count`` integers. ``n[i]`` specifies the
-      number of rows and columns of ``C`` for every matrix in group ``i``. All entries must be at least zero.
-
-   k
-      Array of ``group_count`` integers. ``k[i]`` specifies the
-      number of columns of op(``A``) for every matrix in group ``i``. All entries must be at
-      least zero.
-
-   alpha
-      Array of ``group_count`` scalar elements. ``alpha[i]`` specifies the scaling factor for every rank-k update in group ``i``.
-
-   a
-      Array of pointers to input matrices ``A`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   lda
-      Array of ``group_count`` integers. ``lda[i]`` specifies the
-      leading dimension of ``A`` for every matrix in group ``i``. All
-      entries must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda[i]`` must be at least ``n[i]``.
-           - ``lda[i]`` must be at least ``k[i]``.
-         * - Row major
-           - ``lda[i]`` must be at least ``k[i]``.
-           - ``lda[i]`` must be at least ``n[i]``.
-             
-   beta
-      Array of ``group_count`` scalar elements. ``beta[i]`` specifies the scaling factor for matrix ``C`` 
-      for every matrix in group ``i``.
-
-   c
-      Array of pointers to input/output matrices ``C`` with size ``total_batch_count``. 
-      
-      See :ref:`matrix-storage` for more details.
-
-   ldc
-      Array of ``group_count`` integers. ``ldc[i]`` specifies the
-      leading dimension of ``C`` for every matrix in group ``i``.  All
-      entries must be positive and ``ldc[i]`` must be at least ``n[i]``.
-
-   group_count
-      Specifies the number of groups. Must be at least 0.
-
-   group_size
-      Array of ``group_count`` integers. ``group_size[i]`` specifies the
-      number of rank-k update products in group ``i``. All entries must be at least 0.
-
-   dependencies
-         List of events to wait for before starting computation, if any.
-         If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Overwritten by the ``n[i]``-by-``n[i]`` matrix calculated by 
-      (``alpha[i]`` * op(``A``)*op(``A``)^T + ``beta[i]`` * ``C``) for group ``i``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event syrk_batch(sycl::queue &queue,
-                              uplo upper_lower,
-                              transpose trans,
-                              std::int64_t n,
-                              std::int64_t k,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stride_a,
-                              T beta,
-                              T *c,
-                              std::int64_t ldc,
-                              std::int64_t stride_c,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event syrk_batch(sycl::queue &queue,
-                              uplo upper_lower,
-                              transpose trans,
-                              std::int64_t n,
-                              std::int64_t k,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stride_a,
-                              T beta,
-                              T *c,
-                              std::int64_t ldc,
-                              std::int64_t stride_c,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether data in ``C`` is stored in its upper or lower triangle.
-      For more details, see :ref:`onemkl_datatypes`.
-
-   trans
-      Specifies op(``A``) the transposition operation applied to the
-      matrices ``A``. Conjugation is never performed, even if trans =
-      transpose::conjtrans. See :ref:`onemkl_datatypes` for more
-      details.
-
-   n
-      Number of rows and columns of ``C``.
-      Must be at least zero.
-
-   k
-      Number of columns of op(``A``).
-      Must be at least zero.
-
-   alpha
-      Scaling factor for the rank-k updates.
-
-   a
-      Pointer to input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      The leading dimension of the matrices ``A``. It must be positive.
-
-      .. list-table::
-         :header-rows: 1
-
-         * -
-           - ``A`` not transposed
-           - ``A`` transposed
-         * - Column major
-           - ``lda`` must be at least ``n``.
-           - ``lda`` must be at least ``k``.
-         * - Row major
-           - ``lda`` must be at least ``k``.
-           - ``lda`` must be at least ``n``.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   beta
-      Scaling factor for the matrices ``C``.
-
-   c
-      Pointer to input/output matrices ``C`` with size ``stridec`` * ``batch_size``.
-
-   ldc
-      The leading dimension of the matrices ``C``. It must be positive
-      and at least ``n``.
-
-   stridec
-      Stride between different ``C`` matrices.
-
-   batch_size
-      Specifies the number of rank-k update operations to perform.
-
-   dependencies
-         List of events to wait for before starting computation, if any.
-         If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   c
-      Output matrices, overwritten by ``batch_size`` rank-k update
-      operations of the form ``alpha`` * op(``A``)*op(``A``)^T + ``beta`` * ``C``.
-
-.. container:: section
-      
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
diff --git a/docs/domains/blas/tbmv.rst b/docs/domains/blas/tbmv.rst
deleted file mode 100644
index 716013cb3..000000000
--- a/docs/domains/blas/tbmv.rst
+++ /dev/null
@@ -1,223 +0,0 @@
-.. _onemkl_blas_tbmv:
-
-tbmv
-====
-
-Computes a matrix-vector product using a triangular band matrix.
-
-.. _onemkl_blas_tbmv_description:
-
-.. rubric:: Description
-
-The ``tbmv`` routines compute a matrix-vector product with a triangular
-band matrix. The operation is defined as:
-
-.. math::
-
-      x \leftarrow op(A)*x
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``A`` is an ``n``-by-``n`` unit or non-unit, upper or lower
-triangular band matrix, with (``k`` + 1) diagonals,
-
-``x`` is a vector of length ``n``.
-
-``tbmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_tbmv_buffer:
-
-tbmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void tbmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void tbmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of sub/super-diagonals of the matrix ``A``. Must be at
-      least zero.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` + 1),
-      and positive.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding the updated vector ``x``.
-
-      
-
-.. _onemkl_blas_tbmv_usm:
-
-tbmv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event tbmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event tbmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of sub/super-diagonals of the matrix ``A``. Must be at
-      least zero.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` +
-      1), and positive.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the updated vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/tbsv.rst b/docs/domains/blas/tbsv.rst
deleted file mode 100644
index bce876115..000000000
--- a/docs/domains/blas/tbsv.rst
+++ /dev/null
@@ -1,225 +0,0 @@
-.. _onemkl_blas_tbsv:
-
-tbsv
-====
-
-Solves a system of linear equations whose coefficients are in a
-triangular band matrix.
-
-.. _onemkl_blas_tbsv_description:
-
-.. rubric:: Description
-
-The ``tbsv`` routines solve a system of linear equations whose
-coefficients are in a triangular band matrix. The operation is
-defined as:
-
-.. math::
-
-      op(A)*x = b
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``A`` is an ``n``-by-``n`` unit or non-unit, upper or lower
-triangular band matrix, with (``k`` + 1) diagonals,
-
-``b`` and ``x`` are vectors of length ``n``.
-
-``tbsv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_tbsv_buffer:
-
-tbsv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void tbsv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void tbsv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of sub/super-diagonals of the matrix ``A``. Must be at
-      least zero.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` + 1),
-      and positive.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding the solution vector ``x``.
-
-      
-
-.. _onemkl_blas_tbsv_usm:
-
-tbsv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event tbsv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event tbsv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Number of rows and columns of ``A``. Must be at least zero.
-
-   k
-      Number of sub/super-diagonals of the matrix ``A``. Must be at
-      least zero.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least (``k`` +
-      1), and positive.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the solution vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/tpmv.rst b/docs/domains/blas/tpmv.rst
deleted file mode 100644
index 736fbcf21..000000000
--- a/docs/domains/blas/tpmv.rst
+++ /dev/null
@@ -1,199 +0,0 @@
-.. _onemkl_blas_tpmv:
-
-tpmv
-====
-
-Computes a matrix-vector product using a triangular packed matrix.
-
-.. _onemkl_blas_tpmv_description:
-
-.. rubric:: Description
-
-The ``tpmv`` routines compute a matrix-vector product with a triangular
-packed matrix. The operation is defined as:
-
-.. math::
-
-      x \leftarrow op(A)*x
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``A`` is an ``n``-by-``n`` unit or non-unit, upper or lower
-triangular band matrix, supplied in packed form,
-
-``x`` is a vector of length ``n``.
-
-``tpmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_tpmv_buffer:
-
-tpmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void tpmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void tpmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n``\ +1))/2. See :ref:`matrix-storage` for
-      more details.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding the updated vector ``x``.
-
-
-.. _onemkl_blas_tpmv_usm:
-
-tpmv (USM Version)
-------------------
-      
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event tpmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        const T *a,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event tpmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        const T *a,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n``\ +1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the updated vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/tpsv.rst b/docs/domains/blas/tpsv.rst
deleted file mode 100644
index 14082a077..000000000
--- a/docs/domains/blas/tpsv.rst
+++ /dev/null
@@ -1,207 +0,0 @@
-.. _onemkl_blas_tpsv:
-
-tpsv
-====
-
-Solves a system of linear equations whose coefficients are in a
-triangular packed matrix.
-
-.. _onemkl_blas_tpsv_description:
-
-.. rubric:: Description
-
-The ``tpsv`` routines solve a system of linear equations whose
-coefficients are in a triangular packed matrix. The operation is
-defined as:
-
-.. math::
-
-      op(A)*x = b
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``A`` is an ``n``-by-``n`` unit or non-unit, upper or lower
-triangular band matrix, supplied in packed form,
-
-``b`` and ``x`` are vectors of length ``n``.
-
-``tpsv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_tpsv_buffer:
-
-tpsv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-      
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void tpsv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void tpsv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      (``n``\ \*(``n``\ +1))/2. See :ref:`matrix-storage` for
-      more details.
-
-   x
-      Buffer holding the ``n``-element right-hand side vector ``b``. The
-      buffer must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding the solution vector ``x``.
-
-
-.. _onemkl_blas_tpsv_usm:
-
-tpsv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event tpsv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event tpsv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least (``n``\ \*(``n``\ +1))/2. See
-      :ref:`matrix-storage` for
-      more details.
-
-   x
-      Pointer to the ``n``-element right-hand side vector ``b``. The
-      array holding the ``n``-element right-hand side vector ``b``
-      must be of size at least (1 + (``n`` - 1)*abs(``incx``)). See
-      :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the solution vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/trmm.rst b/docs/domains/blas/trmm.rst
deleted file mode 100644
index 1a812d56c..000000000
--- a/docs/domains/blas/trmm.rst
+++ /dev/null
@@ -1,288 +0,0 @@
-.. _onemkl_blas_trmm:
-
-trmm
-====
-
-Computes a matrix-matrix product where one input matrix is triangular
-and one input matrix is general.
-
-.. _onemkl_blas_trmm_description:
-
-.. rubric:: Description
-
-The ``trmm`` routines compute a scalar-matrix-matrix product where one of
-the matrices in the multiplication is triangular. The argument
-``left_right`` determines if the triangular matrix, ``A``, is on the
-left of the multiplication (``left_right`` = ``side::left``) or on
-the right (``left_right`` = ``side::right``). Depending on
-``left_right``. The operation is defined as:
-
-.. math::
-
-      B \leftarrow alpha*op(A)*B
-
-or
-
-.. math::
-
-      B \leftarrow alpha*B*op(A)
-
-where:
-
-op(``A``) is one of op(``A``) = *A*, or op(``A``) = ``A``\ :sup:`T`,
-or op(``A``) = ``A``\ :sup:`H`,
-
-``alpha`` is a scalar,
-
-``A`` is a triangular matrix, and ``B`` is a general matrix.
-
-Here ``B`` is ``m`` x ``n`` and ``A`` is either ``m`` x ``m`` or
-``n`` x ``n``, depending on ``left_right``.
-
-``trmm`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_trmm_buffer:
-
-trmm (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void trmm(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose transa,
-                 onemkl::diag unit_diag,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void trmm(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose transa,
-                 onemkl::diag unit_diag,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` is on the left side of the multiplication
-      (``side::left``) or on the right side (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   uplo
-      Specifies whether the matrix ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_diag
-      Specifies whether ``A`` is assumed to be unit triangular (all
-      diagonal elements are 1). See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Specifies the number of rows of ``B``. The value of ``m`` must be
-      at least zero.
-
-   n
-      Specifies the number of columns of ``B``. The value of ``n`` must
-      be at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``left_right`` = ``side::left``, or
-      ``lda``\ \*\ ``n`` if ``left_right`` = ``side::right``. See
-      :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if
-      ``left_right`` = ``side::left``, and at least ``n`` if
-      ``left_right`` = ``side::right``. Must be positive.
-
-   b
-      Buffer holding input/output matrix ``B``. Must have size at
-      least ``ldb``\ \*\ ``n`` if column major layout is used to store
-      matrices or at least ``ldb``\ \*\ ``m`` if row major layout is
-      used to store matrices. See :ref:`matrix-storage` for more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   b
-      Output buffer, overwritten by ``alpha``\ \*op(``A``)\*\ ``B`` or
-      ``alpha``\ \*\ ``B``\ \*op(``A``).
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``alpha`` = 0, matrix ``B`` is set to zero, and ``A`` and ``B`` do
-   not need to be initialized at entry.
-
-      
-
-.. _onemkl_blas_trmm_usm:
-
-trmm (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event trmm(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose transa,
-                        onemkl::diag unit_diag,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T* b,
-                        std::int64_t ldb,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event trmm(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose transa,
-                        onemkl::diag unit_diag,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T* b,
-                        std::int64_t ldb,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-   
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` is on the left side of the
-      multiplication (``side::left``) or on the right side
-      (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   uplo
-      Specifies whether the matrix ``A`` is upper or lower
-      triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_diag
-      Specifies whether ``A`` is assumed to be unit triangular (all
-      diagonal elements are 1). See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Specifies the number of rows of ``B``. The value of ``m`` must
-      be at least zero.
-
-   n
-      Specifies the number of columns of ``B``. The value of ``n``
-      must be at least zero.
-
-   alpha
-      Scaling factor for the matrix-matrix product.
-
-   a
-      Pointer to input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``left_right`` = ``side::left``, or
-      ``lda``\ \*\ ``n`` if ``left_right`` = ``side::right``. See
-      :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if
-      ``left_right`` = ``side::left``, and at least ``n`` if
-      ``left_right`` = ``side::right``. Must be positive.
-
-   b
-      Pointer to input/output matrix ``B``. Must have size at
-      least ``ldb``\ \*\ ``n`` if column major layout is used to store
-      matrices or at least ``ldb``\ \*\ ``m`` if row major layout is
-      used to store matrices. See :ref:`matrix-storage` for more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   b
-      Pointer to the output matrix, overwritten by
-      ``alpha``\ \*op(``A``)\*\ ``B`` or
-      ``alpha``\ \*\ ``B``\ \*op(``A``).
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``alpha`` = 0, matrix ``B`` is set to zero, and ``A`` and ``B``
-   do not need to be initialized at entry.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/trmv.rst b/docs/domains/blas/trmv.rst
deleted file mode 100644
index d779c12a9..000000000
--- a/docs/domains/blas/trmv.rst
+++ /dev/null
@@ -1,210 +0,0 @@
-.. _onemkl_blas_trmv:
-
-trmv
-====
-
-Computes a matrix-vector product using a triangular matrix.
-
-.. _onemkl_blas_trmv_description:
-
-.. rubric:: Description
-
-The ``trmv`` routines compute a matrix-vector product with a triangular
-matrix. The operation is defined as:
-
-.. math::
-
-      x \leftarrow op(A)*x
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``A`` is an ``n``-by-``n`` unit or non-unit, upper or lower
-triangular band matrix,
-
-``x`` is a vector of length ``n``.
-
-``trmv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_trmv_buffer:
-
-trmv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void trmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void trmv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   x
-      Buffer holding input vector ``x``. The buffer must be of size at
-      least (1 + (``n`` - 1)*abs(``incx``)). See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding the updated vector ``x``.
-
-
-.. _onemkl_blas_trmv_usm:
-
-trmv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event trmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event trmv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   x
-      Pointer to input vector ``x``. The array holding input vector
-      ``x`` must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for
-      more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the updated vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/blas/trsm.rst b/docs/domains/blas/trsm.rst
deleted file mode 100644
index 0185e69d7..000000000
--- a/docs/domains/blas/trsm.rst
+++ /dev/null
@@ -1,286 +0,0 @@
-.. _onemkl_blas_trsm:
-
-trsm
-====
-
-Solves a triangular matrix equation (forward or backward solve).
-
-.. _onemkl_blas_trsm_description:
-
-.. rubric:: Description
-
-The ``trsm`` routines solve one of the following matrix equations:
-
-.. math::
-
-      op(A)*X = alpha*B
-
-or
-
-.. math::
-
-      X*op(A) = alpha*B
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``alpha`` is a scalar,
-
-``A`` is a triangular matrix, and
-
-``B`` and ``X`` are ``m`` x ``n`` general matrices.
-
-``A`` is either ``m`` x ``m`` or ``n`` x ``n``, depending on whether
-it multiplies ``X`` on the left or right. On return, the matrix ``B``
-is overwritten by the solution matrix ``X``.
-
-``trsm`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_trsm_buffer:
-
-trsm (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void trsm(sycl::queue &queue,
-                 onemkl::side left_right,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose transa,
-                 onemkl::diag unit_diag,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void trsm(sycl::queue &queue,
-                 onemkl::side left_right,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose transa,
-                 onemkl::diag unit_diag,
-                 std::int64_t m,
-                 std::int64_t n,
-                 T alpha,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &b,
-                 std::int64_t ldb)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` multiplies ``X`` on the left
-      (``side::left``) or on the right (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   uplo
-      Specifies whether the matrix ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_diag
-      Specifies whether ``A`` is assumed to be unit triangular (all
-      diagonal elements are 1). See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Specifies the number of rows of ``B``. The value of ``m`` must be
-      at least zero.
-
-   n
-      Specifies the number of columns of ``B``. The value of ``n`` must
-      be at least zero.
-
-   alpha
-      Scaling factor for the solution.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``left_right`` = ``side::left``, or
-      ``lda``\ \*\ ``n`` if ``left_right`` = ``side::right``. See
-      :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if
-      ``left_right`` = ``side::left``, and at least ``n`` if
-      ``left_right`` = ``side::right``. Must be positive.
-
-   b
-      Buffer holding input/output matrix ``B``. Must have size at
-      least ``ldb``\ \*\ ``n`` if column major layout is used to store
-      matrices or at least ``ldb``\ \*\ ``m`` if row major layout is
-      used to store matrices. See :ref:`matrix-storage` for more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   b
-      Output buffer. Overwritten by the solution matrix ``X``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``alpha`` = 0, matrix ``B`` is set to zero, and ``A`` and ``B`` do
-   not need to be initialized at entry.
-
-      
-
-.. _onemkl_blas_trsm_usm:
-
-trsm (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event trsm(sycl::queue &queue,
-                        onemkl::side left_right,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose transa,
-                        onemkl::diag unit_diag,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T* b,
-                        std::int64_t ldb,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event trsm(sycl::queue &queue,
-                        onemkl::side left_right,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose transa,
-                        onemkl::diag unit_diag,
-                        std::int64_t m,
-                        std::int64_t n,
-                        T alpha,
-                        const T* a,
-                        std::int64_t lda,
-                        T* b,
-                        std::int64_t ldb,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether ``A`` multiplies ``X`` on the left
-      (``side::left``) or on the right (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   uplo
-      Specifies whether the matrix ``A`` is upper or lower
-      triangular. See :ref:`onemkl_datatypes` for more details.
-
-   transa
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_diag
-      Specifies whether ``A`` is assumed to be unit triangular (all
-      diagonal elements are 1). See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Specifies the number of rows of ``B``. The value of ``m`` must
-      be at least zero.
-
-   n
-      Specifies the number of columns of ``B``. The value of ``n``
-      must be at least zero.
-
-   alpha
-      Scaling factor for the solution.
-
-   a
-      Pointer to input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``m`` if ``left_right`` = ``side::left``, or
-      ``lda``\ \*\ ``n`` if ``left_right`` = ``side::right``. See
-      :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of ``A``. Must be at least ``m`` if
-      ``left_right`` = ``side::left``, and at least ``n`` if
-      ``left_right`` = ``side::right``. Must be positive.
-
-   b
-      Pointer to input/output matrix ``B``. Must have size at
-      least ``ldb``\ \*\ ``n`` if column major layout is used to store
-      matrices or at least ``ldb``\ \*\ ``m`` if row major layout is
-      used to store matrices. See :ref:`matrix-storage` for more details.
-
-   ldb
-      Leading dimension of ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   b
-      Pointer to the output matrix. Overwritten by the solution
-      matrix ``X``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``alpha`` = 0, matrix ``B`` is set to zero, and ``A`` and ``B``
-   do not need to be initialized at entry.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-3-routines`
diff --git a/docs/domains/blas/trsm_batch.rst b/docs/domains/blas/trsm_batch.rst
deleted file mode 100644
index e68b68aa2..000000000
--- a/docs/domains/blas/trsm_batch.rst
+++ /dev/null
@@ -1,497 +0,0 @@
-.. _onemkl_blas_trsm_batch:
-
-trsm_batch
-==========
-
-Computes a group of ``trsm`` operations.
-
-.. _onemkl_blas_trsm_batch_description:
-
-.. rubric:: Description
-
-The ``trsm_batch`` routines are batched versions of :ref:`onemkl_blas_trsm`, performing
-multiple ``trsm`` operations in a single call. Each ``trsm`` 
-solves an equation of the form op(A) \* X = alpha \* B or X \* op(A) = alpha \* B. 
-   
-``trsm_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_trsm_batch_buffer:
-
-trsm_batch (Buffer Version)
----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``trsm_batch`` supports only the strided API. 
-   
-The strided API operation is defined as:
-::
-
-   for i = 0 … batch_size – 1
-       A and B are matrices at offset i * stridea and i * strideb in a and b.
-       if (left_right == onemkl::side::left) then
-           compute X such that op(A) * X = alpha * B
-       else
-           compute X such that X * op(A) = alpha * B
-       end if
-       B := X
-   end for
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(A) = ``A``\ :sup:`T`,
-or op(``A``) = ``A``\ :sup:`H`,
-
-``alpha`` is a scalar,
-
-``A`` is a triangular matrix,
-
-``B`` and ``X`` are ``m`` x ``n`` general matrices,
-
-``A`` is either ``m`` x ``m`` or ``n`` x ``n``,depending on whether
-it multiplies ``X`` on the left or right. On return, the matrix ``B``
-is overwritten by the solution matrix ``X``.
-
-The ``a`` and ``b`` buffers contain all the input matrices. The stride 
-between matrices is given by the stride parameter. The total number
-of matrices in ``a`` and ``b`` buffers are given by the ``batch_size`` parameter.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void trsm_batch(sycl::queue &queue,
-                       onemkl::side left_right,
-                       onemkl::uplo upper_lower,
-                       onemkl::transpose trans,
-                       onemkl::diag unit_diag,
-                       std::int64_t m,
-                       std::int64_t n,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &b,
-                       std::int64_t ldb,
-                       std::int64_t strideb,
-                       std::int64_t batch_size)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void trsm_batch(sycl::queue &queue,
-                       onemkl::side left_right,
-                       onemkl::uplo upper_lower,
-                       onemkl::transpose trans,
-                       onemkl::diag unit_diag,
-                       std::int64_t m,
-                       std::int64_t n,
-                       T alpha,
-                       sycl::buffer<T,1> &a,
-                       std::int64_t lda,
-                       std::int64_t stridea,
-                       sycl::buffer<T,1> &b,
-                       std::int64_t ldb,
-                       std::int64_t strideb,
-                       std::int64_t batch_size)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether the matrices ``A`` multiply ``X`` on the left
-      (``side::left``) or on the right (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   upper_lower
-      Specifies whether the matrices ``A`` are upper or lower
-      triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to the
-      matrices ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_diag
-      Specifies whether the matrices ``A`` are assumed to be unit
-      triangular (all diagonal elements are 1). See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of the ``B`` matrices. Must be at least zero.
-
-   n
-      Number of columns of the ``B`` matrices. Must be at least zero.
-
-   alpha
-      Scaling factor for the solutions.
-
-   a
-      Buffer holding the input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      Leading dimension of the matrices ``A``. Must be at least ``m`` if
-      ``left_right`` = ``side::left``, and at least ``n`` if ``left_right`` =
-      ``side::right``. Must be positive.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   b
-      Buffer holding the input matrices ``B`` with size ``strideb`` * ``batch_size``.
-
-   ldb
-      Leading dimension of the matrices ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   strideb
-      Stride between different ``B`` matrices.
-
-   batch_size
-      Specifies the number of triangular linear systems to solve.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   b
-      Output buffer, overwritten by ``batch_size`` solution matrices
-      ``X``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``alpha`` = 0, matrix ``B`` is set to zero and the matrices ``A``
-   and ``B`` do not need to be initialized before calling ``trsm_batch``.
-
-
-.. rubric:: Description
-
-The USM version of ``trsm_batch`` supports the group API and strided API. 
-
-The group API operation is defined as:
-::
-
-   idx = 0
-   for i = 0 … group_count – 1
-       for j = 0 … group_size – 1
-           A and B are matrices in a[idx] and b[idx]
-           if (left_right == onemkl::side::left) then
-               compute X such that op(A) * X = alpha[i] * B
-           else
-               compute X such that X * op(A) = alpha[i] * B
-           end if
-           B := X
-           idx = idx + 1
-       end for
-   end for     
-
-
-The strided API operation is defined as:
-::
-
-   for i = 0 … batch_size – 1
-       A and B are matrices at offset i * stridea and i * strideb in a and b.
-       if (left_right == onemkl::side::left) then
-           compute X such that op(A) * X = alpha * B
-       else
-           compute X such that X * op(A) = alpha * B
-       end if
-       B := X
-   end for
-
-   where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(A) = ``A``\ :sup:`T`,
-or op(``A``) = ``A``\ :sup:`H`,
-
-``alpha`` is a scalar,
-
-``A`` is a triangular matrix,
-
-``B`` and ``X`` are ``m`` x ``n`` general matrices,
-
-``A`` is either ``m`` x ``m`` or ``n`` x ``n``,depending on whether
-it multiplies ``X`` on the left or right. On return, the matrix ``B``
-is overwritten by the solution matrix ``X``.
-
-For group API, ``a`` and ``b`` arrays contain the pointers for all the input matrices. 
-The total number of matrices in ``a`` and ``b`` are given by: 
- 
-.. math::
-      
-      total\_batch\_count = \sum_{i=0}^{group\_count-1}group\_size[i]
-
-For strided API, ``a`` and ``b`` arrays contain all the input matrices. The total number of matrices 
-in ``a`` and ``b`` are given by the ``batch_size`` parameter.  
-
-**Group API**
-
-.. rubric:: Syntax
-      
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event trsm_batch(sycl::queue &queue,
-                              onemkl::side *left_right,
-                              onemkl::uplo *upper_lower,
-                              onemkl::transpose *trans,
-                              onemkl::diag *unit_diag,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              T **b,
-                              std::int64_t *ldb,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event trsm_batch(sycl::queue &queue,
-                              onemkl::side *left_right,
-                              onemkl::uplo *upper_lower,
-                              onemkl::transpose *trans,
-                              onemkl::diag *unit_diag,
-                              std::int64_t *m,
-                              std::int64_t *n,
-                              T *alpha,
-                              const T **a,
-                              std::int64_t *lda,
-                              T **b,
-                              std::int64_t *ldb,
-                              std::int64_t group_count,
-                              std::int64_t *group_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Array of ``group_count`` ``onemkl::side`` values. ``left_right[i]`` specifies whether ``A`` multiplies
-      ``X`` on the left (``side::left``) or on the right
-      (``side::right``) for every ``trsm`` operation in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   upper_lower
-      Array of ``group_count`` ``onemkl::uplo`` values. ``upper_lower[i]`` specifies whether ``A`` is upper or lower
-      triangular for every matrix in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Array of ``group_count`` ``onemkl::transpose`` values. ``trans[i]`` specifies the form of op(``A``) used
-      for every ``trsm`` operation in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_diag
-      Array of ``group_count`` ``onemkl::diag`` values. ``unit_diag[i]`` specifies whether ``A`` is assumed to
-      be unit triangular (all diagonal elements are 1) for every matrix in group ``i``. See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Array of ``group_count`` integers. ``m[i]`` specifies the
-      number of rows of ``B`` for every matrix in group ``i``. All entries must be at least zero.
-
-   n
-      Array of ``group_count`` integers. ``n[i]`` specifies the
-      number of columns of ``B`` for every matrix in group ``i``. All entries must be at least zero.
-
-   alpha
-      Array of ``group_count`` scalar elements. ``alpha[i]`` specifies the scaling factor in group ``i``.
-
-   a
-      Array of pointers to input matrices ``A`` with size ``total_batch_count``. See :ref:`matrix-storage` for more details.
-
-   lda
-      Array of ``group_count`` integers. ``lda[i]`` specifies the leading dimension of ``A`` for every matrix in group ``i``. 
-      All entries must be at least ``m``
-      if ``left_right`` is ``side::left``, and at least 
-      ``n`` if ``left_right`` is ``side::right``. All entries must be positive.
-
-   b
-      Array of pointers to input matrices ``B`` with size ``total_batch_count``. See :ref:`matrix-storage` for more details.
-
-   ldb
-      Array of ``group_count`` integers. ``ldb[i]`` specifies the
-      leading dimension of ``B`` for every matrix in group ``i``.  All
-      entries must be positive and at least ``m`` and positive if
-      column major layout is used to store matrices or at least ``n``
-      if row major layout is used to store matrices.
-
-   group_count
-      Specifies the number of groups. Must be at least 0.
-
-   group_size
-      Array of ``group_count`` integers. ``group_size[i]`` specifies the
-      number of ``trsm`` operations in group ``i``. All entries must be at least 0.
-
-   dependencies
-         List of events to wait for before starting computation, if any.
-         If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   b
-      Output buffer, overwritten by the ``total_batch_count`` solution
-      matrices ``X``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``alpha`` = 0, matrix ``B`` is set to zero and the matrices ``A``
-   and ``B`` do not need to be initialized before calling ``trsm_batch``.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event trsm_batch(sycl::queue &queue,
-                              onemkl::side left_right,
-                              onemkl::uplo upper_lower,
-                              onemkl::transpose trans,
-                              onemkl::diag unit_diag,
-                              std::int64_t m,
-                              std::int64_t n,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              T *b,
-                              std::int64_t ldb,
-                              std::int64_t strideb,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event trsm_batch(sycl::queue &queue,
-                              onemkl::side left_right,
-                              onemkl::uplo upper_lower,
-                              onemkl::transpose trans,
-                              onemkl::diag unit_diag,
-                              std::int64_t m,
-                              std::int64_t n,
-                              T alpha,
-                              const T *a,
-                              std::int64_t lda,
-                              std::int64_t stridea,
-                              T *b,
-                              std::int64_t ldb,
-                              std::int64_t strideb,
-                              std::int64_t batch_size,
-                              const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   left_right
-      Specifies whether the matrices ``A`` multiply ``X`` on the left
-      (``side::left``) or on the right (``side::right``). See :ref:`onemkl_datatypes` for more details.
-
-   upper_lower
-      Specifies whether the matrices ``A`` are upper or lower
-      triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to the
-      matrices ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_diag
-      Specifies whether the matrices ``A`` are assumed to be unit
-      triangular (all diagonal elements are 1). See :ref:`onemkl_datatypes` for more details.
-
-   m
-      Number of rows of the ``B`` matrices. Must be at least zero.
-
-   n
-      Number of columns of the ``B`` matrices. Must be at least zero.
-
-   alpha
-      Scaling factor for the solutions.
-
-   a
-      Pointer to input matrices ``A`` with size ``stridea`` * ``batch_size``.
-
-   lda
-      Leading dimension of the matrices ``A``. Must be at least ``m`` if
-      ``left_right`` = ``side::left``, and at least ``n`` if ``left_right`` =
-      ``side::right``. Must be positive.
-
-   stridea
-      Stride between different ``A`` matrices.
-
-   b
-      Pointer to input matrices ``B`` with size ``strideb`` * ``batch_size``.
-
-   ldb
-      Leading dimension of the matrices ``B``. It must be positive and at least
-      ``m`` if column major layout is used to store matrices or at
-      least ``n`` if row major layout is used to store matrices.
-
-   strideb
-      Stride between different ``B`` matrices. 
-
-   batch_size
-      Specifies the number of triangular linear systems to solve.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   b
-      Output matrices, overwritten by ``batch_size`` solution matrices
-      ``X``.
-
-.. container:: section
-
-   .. rubric:: Notes
-
-   If ``alpha`` = 0, matrix ``B`` is set to zero and the matrices ``A``
-   and ``B`` do not need to be initialized before calling ``trsm_batch``.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-like-extensions`
diff --git a/docs/domains/blas/trsv.rst b/docs/domains/blas/trsv.rst
deleted file mode 100644
index 1a30dad8d..000000000
--- a/docs/domains/blas/trsv.rst
+++ /dev/null
@@ -1,215 +0,0 @@
-.. _onemkl_blas_trsv:
-
-trsv
-====
-
-Solves a system of linear equations whose coefficients are in a
-triangular matrix.
-
-.. _onemkl_blas_trsv_description:
-
-.. rubric:: Description
-
-The ``trsv`` routines compute a matrix-vector product with a triangular
-band matrix. The operation is defined as:
-
-.. math::
-
-      op(A)*x = b
-
-where:
-
-op(``A``) is one of op(``A``) = ``A``, or op(``A``) =
-``A``\ :sup:`T`, or op(``A``) = ``A``\ :sup:`H`,
-
-``A`` is an ``n``-by-``n`` unit or non-unit, upper or lower
-triangular matrix,
-
-``b`` and ``x`` are vectors of length ``n``.
-
-``trsv`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_blas_trsv_buffer:
-
-trsv (Buffer Version)
----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       void trsv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       void trsv(sycl::queue &queue,
-                 onemkl::uplo upper_lower,
-                 onemkl::transpose trans,
-                 onemkl::diag unit_nonunit,
-                 std::int64_t n,
-                 std::int64_t k,
-                 sycl::buffer<T,1> &a,
-                 std::int64_t lda,
-                 sycl::buffer<T,1> &x,
-                 std::int64_t incx)
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Buffer holding input matrix ``A``. Must have size at least
-      ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   x
-      Buffer holding the ``n``-element right-hand side vector ``b``. The
-      buffer must be of size at least (1 + (``n`` - 1)*abs(``incx``)).
-      See :ref:`matrix-storage` for more details.
-
-   incx
-      Stride of vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Buffer holding the solution vector ``x``.
-
-      
-
-.. _onemkl_blas_trsv_usm:
-
-trsv (USM Version)
-------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::column_major {
-       sycl::event trsv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-.. code-block:: cpp
-
-   namespace oneapi::mkl::blas::row_major {
-       sycl::event trsv(sycl::queue &queue,
-                        onemkl::uplo upper_lower,
-                        onemkl::transpose trans,
-                        onemkl::diag unit_nonunit,
-                        std::int64_t n,
-                        std::int64_t k,
-                        const T *a,
-                        std::int64_t lda,
-                        T *x,
-                        std::int64_t incx,
-                        const std::vector<sycl::event> &dependencies = {})
-   }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-   queue
-      The queue where the routine should be executed.
-
-   upper_lower
-      Specifies whether ``A`` is upper or lower triangular. See :ref:`onemkl_datatypes` for more details.
-
-   trans
-      Specifies op(``A``), the transposition operation applied to
-      ``A``. See :ref:`onemkl_datatypes` for more details.
-
-   unit_nonunit
-      Specifies whether the matrix ``A`` is unit triangular or not. See :ref:`onemkl_datatypes` for more details.
-
-   n
-      Numbers of rows and columns of ``A``. Must be at least zero.
-
-   a
-      Pointer to input matrix ``A``. The array holding input matrix
-      ``A`` must have size at least ``lda``\ \*\ ``n``. See :ref:`matrix-storage` for
-      more details.
-
-   lda
-      Leading dimension of matrix ``A``. Must be at least ``n``, and
-      positive.
-
-   x
-      Pointer to the ``n``-element right-hand side vector ``b``. The
-      array holding the ``n``-element right-hand side vector ``b``
-      must be of size at least (1 + (``n`` - 1)*abs(``incx``)). See
-      :ref:`matrix-storage` for more details.
-
-   incx
-      Stride of vector ``x``.
-
-   dependencies
-      List of events to wait for before starting computation, if any.
-      If omitted, defaults to no dependencies.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-   x
-      Pointer to the solution vector ``x``.
-
-.. container:: section
-
-   .. rubric:: Return Values
-
-   Output event to wait on to ensure computation is complete.
-
-
-   **Parent topic:** :ref:`blas-level-2-routines`
diff --git a/docs/domains/dense_linear_algebra.rst b/docs/domains/dense_linear_algebra.rst
deleted file mode 100644
index 6544b9074..000000000
--- a/docs/domains/dense_linear_algebra.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-.. _onemkl_dense_linear_algebra:
-
-Dense Linear Algebra
----------------------
-
-This section contains information about dense linear algebra routines:
-
-:ref:`matrix-storage` provides information about dense matrix and vector storage formats that are used by oneMKL :ref:`onemkl_blas` and :ref:`onemkl_lapack`.
-
-:ref:`onemkl_blas` provides vector, matrix-vector, and matrix-matrix routines for dense matrices and vector operations.
-
-:ref:`onemkl_lapack` provides more complex dense linear algebra routines, e.g., matrix factorization, solving dense systems of linear equations, least square problems, eigenvalue and singular value problems, and performing a number of related computational tasks.
-
-.. toctree::
-    :hidden:
-
-    matrix-storage.rst
-    blas/blas.rst
-    lapack/lapack.rst
diff --git a/docs/domains/lapack/gebrd.rst b/docs/domains/lapack/gebrd.rst
deleted file mode 100644
index 7e014f97f..000000000
--- a/docs/domains/lapack/gebrd.rst
+++ /dev/null
@@ -1,230 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_gebrd:
-
-gebrd
-=====
-
-Reduces a general matrix to bidiagonal form.
-
-.. container:: section
-
-    .. rubric:: Description
-
-``gebrd`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine reduces a general :math:`m \times n` matrix :math:`A` to a 
-bidiagonal matrix :math:`B` by an orthogonal (unitary) transformation.
-
-
-If :math:`m \ge n`, the reduction is given by :math:`A=QBP^H=\begin{pmatrix}B_1\\0\end{pmatrix}P^H=Q_1B_1P_H`
-
-where :math:`B_{1}` is an :math:`n \times n` upper diagonal matrix,
-:math:`Q` and :math:`P` are orthogonal or, for a complex :math:`A`, unitary
-matrices; :math:`Q_{1}` consists of the first :math:`n` columns of
-:math:`Q`.
-
-If :math:`m < n`, the reduction is given by
-
-:math:`A = QBP^H = Q\begin{pmatrix}B_1\\0\end{pmatrix}P^H = Q_1B_1P_1^H`,
-
-where :math:`B_{1}` is an :math:`m \times m` lower diagonal matrix,
-:math:`Q` and :math:`P` are orthogonal or, for a complex :math:`A`, unitary
-matrices; :math:`P_{1}` consists of the first :math:`m` columns of
-:math:`P`.
-
-The routine does not form the matrices :math:`Q` and :math:`P` explicitly,
-but represents them as products of elementary reflectors. Routines
-are provided to work with the matrices :math:`Q` and :math:`P` in this
-representation:
-
-If the matrix :math:`A` is real,
-
--  to compute :math:`Q` and :math:`P` explicitly, call
-   :ref:`onemkl_lapack_orgbr`.
-
-If the matrix :math:`A` is complex,
-
--  to compute :math:`Q` and :math:`P` explicitly, call
-   :ref:`onemkl_lapack_ungbr`
-
-gebrd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<realT,1> &d, sycl::buffer<realT,1> &e, sycl::buffer<T,1> &tauq, sycl::buffer<T,1> &taup, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   The buffer :math:`a`, size (``lda,*``). The buffer ``a`` contains the
-   matrix :math:`A`. The second dimension of ``a`` must be at least
-   :math:`\max(1, m)`.
-
-lda
-   The leading dimension of :math:`a`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_gebrd_scratchpad_size` function.
-
-.. container:: section
-
-    .. rubric:: Output Parameters
-
-a
-   If :math:`m \ge n`, the diagonal and first super-diagonal of a are
-   overwritten by the upper bidiagonal matrix :math:`B`. The elements
-   below the diagonal, with the buffer tauq, represent the orthogonal
-   matrix :math:`Q` as a product of elementary reflectors, and the
-   elements above the first superdiagonal, with the buffer ``taup``,
-   represent the orthogonal matrix :math:`P` as a product of elementary
-   reflectors.
-
-   If :math:`m<n`, the diagonal and first sub-diagonal of a are
-   overwritten by the lower bidiagonal matrix :math:`B`. The elements
-   below the first subdiagonal, with the buffer tauq, represent the
-   orthogonal matrix :math:`Q` as a product of elementary reflectors, and
-   the elements above the diagonal, with the buffer ``taup``, represent
-   the orthogonal matrix :math:`P` as a product of elementary reflectors.
-
-d
-   Buffer, size at least :math:`\max(1, \min(m,n))`. Contains the diagonal
-   elements of :math:`B`.
-
-e
-   Buffer, size at least :math:`\max(1, \min(m,n) - 1)`. Contains the
-   off-diagonal elements of :math:`B`.
-
-tauq
-   Buffer, size at least :math:`\max(1, \min(m, n))`. The scalar factors of
-   the elementary reflectors which represent the orthogonal or
-   unitary matrix :math:`Q`.
-
-taup
-   Buffer, size at least :math:`\max(1, \min(m, n))`. The scalar factors of
-   the elementary reflectors which represent the orthogonal or
-   unitary matrix :math:`P`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-gebrd (USM Version)
--------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, RealT *d, RealT *e, T *tauq, T *taup, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-    .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   Pointer to matrix :math:`A`. The second dimension of ``a`` must be at least
-   :math:`\max(1, m)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type T.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_gebrd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-    .. rubric:: Output Parameters
-
-a
-   If :math:`m \ge n`, the diagonal and first super-diagonal of a are
-   overwritten by the upper bidiagonal matrix :math:`B`. The elements
-   below the diagonal, with the array tauq, represent the orthogonal
-   matrix :math:`Q` as a product of elementary reflectors, and the
-   elements above the first superdiagonal, with the array ``taup``,
-   represent the orthogonal matrix :math:`P` as a product of elementary
-   reflectors.
-
-   If :math:`m<n`, the diagonal and first sub-diagonal of a are
-   overwritten by the lower bidiagonal matrix :math:`B`. The elements
-   below the first subdiagonal, with the array tauq, represent the
-   orthogonal matrix :math:`Q` as a product of elementary reflectors, and
-   the elements above the diagonal, with the array ``taup``, represent
-   the orthogonal matrix :math:`P` as a product of elementary reflectors.
-
-d
-   Pointer to memory of size at least :math:`\max(1, \min(m,n))`. Contains the diagonal
-   elements of :math:`B`.
-
-e
-   Pointer to memory of size at least :math:`\max(1, \min(m,n) - 1)`. Contains the
-   off-diagonal elements of :math:`B`.
-
-tauq
-   Pointer to memory of size at least :math:`\max(1, \min(m, n))`. The scalar factors of
-   the elementary reflectors which represent the orthogonal or
-   unitary matrix :math:`Q`.
-
-taup
-   Pointer to memory of size at least :math:`\max(1, \min(m, n))`. The scalar factors of
-   the elementary reflectors which represent the orthogonal or
-   unitary matrix :math:`P`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-    .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/gebrd_scratchpad_size.rst b/docs/domains/lapack/gebrd_scratchpad_size.rst
deleted file mode 100644
index 954c8b032..000000000
--- a/docs/domains/lapack/gebrd_scratchpad_size.rst
+++ /dev/null
@@ -1,61 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_gebrd_scratchpad_size:
-
-gebrd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_gebrd` function.
-
-.. rubric:: Description
-
-``gebrd_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>``
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_gebrd` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_gebrd` function will be performed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-   .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_gebrd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/geqrf.rst b/docs/domains/lapack/geqrf.rst
deleted file mode 100644
index 31a2c97ce..000000000
--- a/docs/domains/lapack/geqrf.rst
+++ /dev/null
@@ -1,157 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_geqrf:
-
-geqrf
-=====
-
-Computes the QR factorization of a general :math:`m \times n` matrix.
-
-.. rubric:: Description
-
-``geqrf`` supports the following precisions:
-
-.. list-table:: 
-   :header-rows: 1
-
-   * -  T 
-   * -  ``float`` 
-   * -  ``double`` 
-   * -  ``std::complex<float>`` 
-   * -  ``std::complex<double>`` 
-
-The routine forms the QR factorization of a general
-:math:`m \times n` matrix :math:`A`. No pivoting is performed.
-
-The routine does not form the matrix :math:`Q` explicitly. Instead, :math:`Q`
-is represented as a product of :math:`\min(m, n)` elementary
-reflectors. Routines are provided to work with :math:`Q` in this
-representation.
-
-geqrf (Buffer Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-    .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in :math:`A` (:math:`0 \le n`).
-
-a
-   Buffer holding input matrix :math:`A`. Must have size at least
-   :math:`\text{lda} \cdot n`.
-
-lda
-   The leading dimension of :math:`A`; at least :math:`\max(1, m)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_geqrf_scratchpad_size` function.
-
-.. container:: section
-
-    .. rubric:: Output Parameters
-
-a
-   Output buffer, overwritten by the factorization data as follows:
-
-   The elements on and above the diagonal of the array contain the
-   :math:`\min(m,n) \times n` upper trapezoidal matrix :math:`R` (:math:`R` is upper
-   triangular if :math:`m \ge n`); the elements below the diagonal, with the
-   array tau, represent the orthogonal matrix :math:`Q` as a product of
-   :math:`\min(m,n)` elementary reflectors.
-
-tau
-   Output buffer, size at least :math:`\max(1, \min(m, n))`. Contains scalars
-   that define elementary reflectors for the matrix :math:`Q` in its
-   decomposition in a product of elementary reflectors.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-geqrf (USM Version)
-----------------------
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-    .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in :math:`A` (:math:`0 \le n`).
-
-a
-   Pointer to memory holding input matrix :math:`A`. Must have size at least
-   :math:`\text{lda} \cdot n`.
-
-lda
-   The leading dimension of :math:`A`; at least :math:`\max(1, m)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_geqrf_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-
-.. container:: section
-
-    .. rubric:: Output Parameters
-
-a
-   Overwritten by the factorization data as follows:
-
-   The elements on and above the diagonal of the array contain the
-   :math:`\min(m,n) \times n` upper trapezoidal matrix :math:`R` (:math:`R` is upper
-   triangular if :math:`m \ge n`); the elements below the diagonal, with the
-   array tau, represent the orthogonal matrix :math:`Q` as a product of
-   :math:`\min(m,n)` elementary reflectors.
-
-tau
-   Array, size at least :math:`\max(1, \min(m, n))`. Contains scalars
-   that define elementary reflectors for the matrix :math:`Q` in its
-   decomposition in a product of elementary reflectors.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-    .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/geqrf_batch.rst b/docs/domains/lapack/geqrf_batch.rst
deleted file mode 100644
index 12581248c..000000000
--- a/docs/domains/lapack/geqrf_batch.rst
+++ /dev/null
@@ -1,239 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_geqrf_batch:
-
-geqrf_batch
-===========
-
-Computes the QR factorizations of a batch of general matrices.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``geqrf_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_lapack_geqrf_batch_buffer:
-
-geqrf_batch (Buffer Version)
-----------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The buffer version of ``geqrf_batch`` supports only the strided API. 
- 
-**Strided API**
-
-.. container:: section
-
-   .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer<T> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue  
-   Device queue where calculations will be performed.
- 
-m
-   Number of rows in matrices :math:`A_i` (:math:`0 \le m`).
-
-n  
-   Number of columns in matrices :math:`A_i` (:math:`0 \le n`).
-
-a
-   Array holding input matrices :math:`A_i`. 
-
-lda
-   Leading dimension of matrices :math:`A_i`.
-
-stride_a
-   Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_tau
-   Stride between the beginnings of arrays :math:`\tau_i` inside the array ``tau``.
-
-batch_size
-   Number of problems in a batch.
-
-scratchpad
-   Scratchpad memory to be used by routine for storing intermediate results.
-         
-scratchpad_size
-   Size of scratchpad memory as the number of floating point elements of type ``T``. Size should not be less than the value returned by the Strided API of the :ref:`onemkl_lapack_geqrf_batch_scratchpad_size` function.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
- 
-a
-  Factorization data as follows: The elements on and above the diagonal of :math:`A_i` contain the :math:`\min(m,n) \times n` upper trapezoidal matrices :math:`R_i` (:math:`R_i` is upper triangular if :math:`m \ge n`); the elements below the diagonal, with the array :math:`\tau_i`, contain the orthogonal matrix :math:`Q_i` as a product of :math:`\min(m,n)` elementary reflectors.
-
-tau 
-    Array to store batch of :math:`\tau_i`, each of size :math:`\min(m,n)`, containing scalars that define elementary reflectors for the matrices :math:`Q_i` in its decomposition in a product of elementary reflectors.
-
-geqrf_batch (USM Version)
--------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The USM version of ``geqrf_batch`` supports the group API and strided API. 
-
-**Group API**
-
-The routine forms the :math:`Q_iR_i` factorizations of a general :math:`m \times n` matrices :math:`A_i`, :math:`i \in \{1...batch\_size\}`, where ``batch_size`` is the sum of all parameter group sizes as provided with ``group_sizes`` array.
-No pivoting is performed during factorization.
-The routine does not form the matrices :math:`Q_i` explicitly. Instead, :math:`Q_i` is represented as a product of :math:`\min(m,n)` elementary reflectors. Routines are provided to work with :math:`Q_i` in this representation.
-The total number of problems to solve, ``batch_size``, is a sum of sizes of all of the groups of parameters as provided by ``group_sizes`` array.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, T **a, std::int64_t *lda, T **tau, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue 
-  Device queue where calculations will be performed.
-
-m
-  Array of ``group_count`` :math:`m_g` parameters. Each :math:`m_g` specifies the number of rows in matrices :math:`A_i` from array ``a``, belonging to group :math:`g`.
-
-n 
-  Array of ``group_count`` :math:`n_g` parameters.
-  Each :math:`n_g` specifies the number of columns in matrices :math:`A_i` from array ``a``, belonging to group :math:`g`.
-
-a  
-  Array of ``batch_size`` pointers to input matrices :math:`A_i`, each of size :math:`\text{lda}_g\cdot n_g` (:math:`g` is an index of group to which :math:`A_i` belongs)
-
-lda
-  Array of ``group_count`` :math:`\text{lda}_g`` parameters, each representing the leading dimensions of input matrices :math:`A_i` from array ``a``, belonging to group :math:`g`.
-
-group_count
-  Specifies the number of groups of parameters. Must be at least 0.
-
-group_sizes 
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as the number of floating point elements of type ``T``. Size should not be less than the value returned by the Group API of the :ref:`onemkl_lapack_geqrf_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  Factorization data as follows: The elements on and above the diagonal of :math:`A_i` contain the :math:`\min(m_g,n_g) \times n_g` upper trapezoidal matrices :math:`R_i` (:math:`R_i` is upper triangular if :math:`m_g \ge n_g`); the elements below the diagonal, with the array :math:`\tau_i`, contain the orthogonal matrix :math:`Q_i` as a product of :math:`\min(m_g,n_g)` elementary reflectors. Here :math:`g` is the index of the parameters group corresponding to the :math:`i`-th decomposition.
-
-tau
-  Array of pointers to store arrays :math:`\tau_i`, each of size :math:`\min(m_g,n_g)`, containing scalars that define elementary reflectors for the matrices :math:`Q_i` in its decomposition in a product of elementary reflectors. Here :math:`g` is the index of the parameters group corresponding to the :math:`i`-th decomposition.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-The routine forms the :math:`Q_iR_i` factorizations of general :math:`m \times n` matrices :math:`A_i`. No pivoting is performed.
-The routine does not form the matrices :math:`Q_i` explicitly. Instead, :math:`Q_i` is represented as a product of :math:`\min(m,n)` elementary reflectors. Routines are provided to work with :math:`Q_i` in this representation.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, T *tau, std::int64_t stride_tau, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m 
-  Number of rows in matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in matrices :math:`A_i` (:math:`0 \le n`).
-
-a
-  Array holding input matrices :math:`A_i`.
-
-lda
-  Leading dimensions of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_tau
-  Stride between the beginnings of arrays :math:`\tau_i` inside the array ``tau``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as the number of floating point elements of type ``T``. Size should not be less than the value returned by the Strided API of the :ref:`onemkl_lapack_geqrf_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  Factorization data as follows: The elements on and above the diagonal of :math:`A_i` contain the :math:`\min(m,n) \times n` upper trapezoidal matrices :math:`R_i` (:math:`R_i` is upper triangular if :math:`m \ge n`); the elements below the diagonal, with the array :math:`\tau_i`, contain the orthogonal matrix :math:`Q_i` as a product of :math:`\min(m,n)` elementary reflectors.
-
-tau
-  Array to store batch of :math:`\tau_i`, each of size :math:`\min(m,n)`, containing scalars that define elementary reflectors for the matrices :math:`Q_i` in its decomposition in a product of elementary reflectors.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
diff --git a/docs/domains/lapack/geqrf_batch_scratchpad_size.rst b/docs/domains/lapack/geqrf_batch_scratchpad_size.rst
deleted file mode 100644
index bea681f3d..000000000
--- a/docs/domains/lapack/geqrf_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,111 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_geqrf_batch_scratchpad_size:
-
-geqrf_batch_scratchpad_size
-===========================
-
-Computes size of scratchpad memory required for the :ref:`onemkl_lapack_geqrf_batch` function.
-
-.. rubric:: Description
-
-``geqrf_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_geqrf_batch` function.
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-m
- | Array of ``group_count`` :math:`m_g` parameters.
- | Each of :math:`m_g` specifies the number of rows in the matrices :math:`A_i` belonging to group :math:`g`.
-
-n
- | Array of ``group_count`` :math:`n_g` parameters.
- | Each of :math:`n_g` specifies the number of columns in the matrices :math:`A_i` belonging to group :math:`g`.
-
-lda
-  Array of ``group_count`` :math:`lda_g` parameters, each representing the leading dimensions of input matrices belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_geqrf_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_geqrf_batch` function.
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in :math:`A_i` (:math:`0 \le n`).
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_tau
-  Stride between the beginnings of arrays :math:`\tau_i` inside the array ``tau``.
-
-batch_size
-  Number of problems in a batch.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_geqrf_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
diff --git a/docs/domains/lapack/geqrf_scratchpad_size.rst b/docs/domains/lapack/geqrf_scratchpad_size.rst
deleted file mode 100644
index 8541bc724..000000000
--- a/docs/domains/lapack/geqrf_scratchpad_size.rst
+++ /dev/null
@@ -1,64 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_geqrf_scratchpad_size:
-
-geqrf_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_geqrf` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``geqrf_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>``
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_geqrf` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_geqrf` function will be performed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_geqrf` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/gerqf.rst b/docs/domains/lapack/gerqf.rst
deleted file mode 100644
index 7f072eba3..000000000
--- a/docs/domains/lapack/gerqf.rst
+++ /dev/null
@@ -1,148 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_gerqf:
-
-gerqf
-=====
-
-Computes the RQ factorization of a general :math:`m \times n` matrix.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``gerqf`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>``
-
-The routine forms the RQ factorization of a general :math:`m \times n` matrix :math:`A`. No pivoting is performed.
-The routine does not form the matrix :math:`Q` explicitly. Instead, :math:`Q` is represented as a product of :math:`\min(m, n)` elementary reflectors. Routines are provided to work with :math:`Q` in this representation
-
-gerqf (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer<T> &a, std::int64_t lda, sycl::buffer<T> &tau, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations will be performed.
-   
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-   
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-   
-a
-   Buffer holding input matrix :math:`A`. The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-   
-lda
-   The leading dimension of ``a``, at least :math:`\max(1, m)`.
-      
-scratchpad
-   Buffer holding scratchpad memory to be used by the routine for storing intermediate results.
-   
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less than the value returned by the :ref:`onemkl_lapack_gerqf_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Output buffer, overwritten by the factorization data as follows:
-
-   If :math:`m \le n`, the upper triangle of the subarray ``a(1:m, n-m+1:n)`` contains the :math:`m \times m` upper triangular matrix :math:`R`; if :math:`m \ge n`, the elements on and above the :math:`(m-n)`-th subdiagonal contain the :math:`m \times n` upper trapezoidal matrix :math:`R`
-
-   In both cases, the remaining elements, with the array ``tau``, represent the orthogonal/unitary matrix :math:`Q` as a product of :math:`\min(m,n)` elementary reflectors.
-
-tau
-   Array, size at least :math:`\min(m,n)`.
-
-   Contains scalars that define elementary reflectors for the matrix :math:`Q` in its decomposition in a product of elementary reflectors.
-
-gerqf (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations will be performed.
-   
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-   
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-   
-a
-   Buffer holding input matrix :math:`A`. The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-   
-lda
-   The leading dimension of ``a``, at least :math:`\max(1, m)`.
-      
-scratchpad
-   Buffer holding scratchpad memory to be used by the routine for storing intermediate results.
-   
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less than the value returned by the :ref:`onemkl_lapack_gerqf_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Output buffer, overwritten by the factorization data as follows:
-
-   If :math:`m \le n`, the upper triangle of the subarray ``a(1:m, n-m+1:n)`` contains the :math:`m \times m` upper triangular matrix :math:`R`; if :math:`m \ge n`, the elements on and above the :math:`(m-n)`-th subdiagonal contain the :math:`m \times n` upper trapezoidal matrix :math:`R`
-
-   In both cases, the remaining elements, with the array ``tau``, represent the orthogonal/unitary matrix :math:`Q` as a product of :math:`\min(m,n)` elementary reflectors.
-
-tau
-   Array, size at least :math:`\min(m,n)`.
-
-   Contains scalars that define elementary reflectors for the matrix :math:`Q` in its decomposition in a product of elementary reflectors.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/gerqf_scratchpad_size.rst b/docs/domains/lapack/gerqf_scratchpad_size.rst
deleted file mode 100644
index f35d02ef6..000000000
--- a/docs/domains/lapack/gerqf_scratchpad_size.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_gerqf_scratchpad_size:
-
-gerqf_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_gerqf` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``gerqf_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-  
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_gerqf` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-gerqf_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by the gerqf (buffer or USM version) function will be performed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1,m)`.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_gerqf` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/gesvd.rst b/docs/domains/lapack/gesvd.rst
deleted file mode 100644
index f778d7a73..000000000
--- a/docs/domains/lapack/gesvd.rst
+++ /dev/null
@@ -1,344 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_gesvd:
-
-gesvd
-=====
-
-Computes the singular value decomposition of a general rectangular matrix.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``gesvd`` supports the following precisions.
-
-    .. list-table::
-       :header-rows: 1
-
-       * -  T
-       * -  ``float``
-       * -  ``double``
-       * -  ``std::complex<float>``
-       * -  ``std::complex<double>``
-
-.. _onemkl_lapack_gesvd_batch_buffer:
-
-gesvd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The routine computes the singular value decomposition (SVD) of a
-real/complex :math:`m \times n` matrix :math:`A`, optionally computing the
-left and/or right singular vectors. The SVD is written as
-
-:math:`A = U\Sigma V^T` for real routines
-
-:math:`A = U\Sigma V^H` for complex routines
-
-where :math:`\Sigma` is an :math:`m \times n` diagonal matrix, :math:`U` is an
-:math:`m \times m` orthogonal/unitary matrix, and :math:`V` is an
-:math:`n \times n` orthogonal/unitary matrix. The diagonal elements of :math:`\Sigma`
-are the singular values of :math:`A`; they are real and non-negative, and
-are returned in descending order. The first :math:`\min(m, n)` columns of
-:math:`U` and :math:`V` are the left and right singular vectors of :math:`A`.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void gesvd(sycl::queue &queue, oneapi::mkl::job jobu, oneapi::mkl::job jobvt, std::int64_t m, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<realT,1> &s, sycl::buffer<T,1> &u, std::int64_t ldu, sycl::buffer<T,1> &vt, std::int64_t ldvt, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-jobu
-   Must be ``job::allvec``, ``job::somevec``, ``job::overwritevec``,
-   or ``job::novec``. Specifies options for computing all or part of
-   the matrix :math:`U`.
-
-   If ``jobu = job::allvec``, all :math:`m` columns of :math:`U` are returned
-   in the buffer ``u``;
-
-   if ``jobu = job::somevec``, the first :math:`\min(m, n)` columns of
-   :math:`U` (the left singular vectors) are returned in the buffer ``u``;
-
-   if ``jobu = job::overwritevec``, the first :math:`\min(m, n)` columns
-   of :math:`U` (the left singular vectors) are overwritten on the buffer
-   a;
-
-   if ``jobu = job::novec``, no columns of :math:`U` (no left singular
-   vectors) are computed.
-
-jobvt
-   Must be ``job::allvec, job::somevec``, ``job::overwritevec``, or
-   ``job::novec``. Specifies options for computing all or part of the
-   matrix :math:`V^T/V^H`.
-
-   If ``jobvt = job::allvec``, all :math:`n` columns of :math:`V^T/V^H` are
-   returned in the buffer vt;
-
-   if ``jobvt = job::somevec``, the first :math:`\min(m, n)` columns of
-   :math:`V^T/V^H` (the left singular vectors) are returned in the buffer
-   vt;
-
-   if ``jobvt = job::overwritevec``, the first :math:`\min(m, n)` columns
-   of :math:`V^T/V^H` (the left singular vectors) are overwritten on the
-   buffer ``a``;
-
-   if ``jobvt = job::novec``, no columns of :math:`V^T/V^H` (no left
-   singular vectors) are computed.
-
-   ``jobvt`` and ``jobu`` cannot both be ``job::overwritevec``.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-a
-   The buffer ``a``, size ``(lda,*)``. The buffer ``a`` contains the
-   matrix :math:`A`. The second dimension of ``a`` must be at least
-   :math:`\max(1, m)`.
-
-lda
-   The leading dimension of ``a``.
-
-ldu
-   The leading dimension of ``u``.
-
-ldvt
-   The leading dimension of ``vt``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_gesvd_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   On exit,
-
-   If ``jobu = job::overwritevec``, ``a`` is overwritten with the first
-   :math:`\min(m,n)` columns of :math:`U` (the left singular vectors stored
-   columnwise);
-
-   If ``jobvt = job::overwritevec``, ``a`` is overwritten with the first
-   :math:`\min(m, n)` rows of :math:`V^{T}`/:math:`V^{H}` (the right
-   singular vectors stored rowwise);
-
-   If ``jobu`` :math:`\ne` ``job::overwritevec`` and ``jobvt`` :math:`\ne` ``job::overwritevec``,
-   the contents of a are destroyed.
-
-s
-   Buffer containing the singular values, size at least
-   :math:`\max(1, \min(m,n))`. Contains the singular values of :math:`A` sorted
-   so that :math:`s(i) \ge s(i+1)`.
-
-u
-   Buffer containing :math:`U`; the second dimension of ``u`` must be at
-   least :math:`\max(1, m)` if ``jobu = job::allvec``, and at least
-   :math:`\max(1, \min(m, n))` if ``jobu = job::somevec``.
-
-   If ``jobu = job::allvec``, ``u`` contains the :math:`m \times m`
-   orthogonal/unitary matrix :math:`U`.
-
-   If ``jobu = job::somevec``, ``u`` contains the first :math:`\min(m, n)`
-   columns of :math:`U` (the left singular vectors stored column-wise).
-
-   If ``jobu = job::novec`` or ``job::overwritevec``, ``u`` is not
-   referenced.
-
-vt
-   Buffer containing :math:`V^{T}`; the second dimension of ``vt`` must
-   be at least :math:`\max(1, n)`.
-
-   If ``jobvt = job::allvec``, ``vt`` contains the :math:`n \times n`
-   orthogonal/unitary matrix :math:`V^{T}`/:math:`V^{H}`.
-
-   If ``jobvt = job::somevec``, ``vt`` contains the first :math:`\min(m, n)`
-   rows of :math:`V^{T}`/:math:`V^{H}` (the right singular
-   vectors stored row-wise).
-
-   If ``jobvt = job::novec`` or ``job::overwritevec``, ``vt`` is not
-   referenced.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-gesvd (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The routine computes the singular value decomposition (SVD) of a
-real/complex :math:`m \times n` matrix :math:`A`, optionally computing the
-left and/or right singular vectors. The SVD is written as
-
-:math:`A = U\Sigma V^T` for real routines
-
-:math:`A = U\Sigma V^H` for complex routines
-
-where :math:`\Sigma` is an :math:`m \times n` diagonal matrix, :math:`U` is an
-:math:`m \times m` orthogonal/unitary matrix, and :math:`V` is an
-:math:`n \times n` orthogonal/unitary matrix. The diagonal elements of :math:`\Sigma`
-are the singular values of :math:`A`; they are real and non-negative, and
-are returned in descending order. The first :math:`\min(m, n)` columns of
-:math:`U` and :math:`V` are the left and right singular vectors of :math:`A`.
-
-.. container:: section
-  
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event gesvd(sycl::queue &queue, oneapi::mkl::job jobu, oneapi::mkl::job jobvt, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, RealT *s, T *u, std::int64_t ldu, T *vt, std::int64_t ldvt, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-jobu
-   Must be ``job::allvec``, ``job::somevec``, ``job::overwritevec``,
-   or ``job::novec``. Specifies options for computing all or part of
-   the matrix :math:`U`.
-
-   If ``jobu = job::allvec``, all :math:`m` columns of :math:`U` are returned
-   in the array ``u``;
-
-   if ``jobu = job::somevec``, the first :math:`\min(m, n)` columns of
-   :math:`U` (the left singular vectors) are returned in the array ``u``;
-
-   if ``jobu = job::overwritevec``, the first :math:`\min(m, n)` columns
-   of :math:`U` (the left singular vectors) are overwritten on the array
-   a;
-
-   if ``jobu = job::novec``, no columns of :math:`U` (no left singular
-   vectors) are computed.
-
-jobvt
-   Must be ``job::allvec, job::somevec``, ``job::overwritevec``, or
-   ``job::novec``. Specifies options for computing all or part of the
-   matrix :math:`V^T/V^H`.
-
-   If ``jobvt = job::allvec``, all :math:`n` columns of :math:`V^T/V^H` are
-   returned in the array ``vt``;
-
-   if ``jobvt = job::somevec``, the first :math:`\min(m, n)` columns of
-   :math:`V^T/V^H` (the left singular vectors) are returned in the array
-   vt;
-
-   if ``jobvt = job::overwritevec``, the first :math:`\min(m, n)` columns
-   of :math:`V^T/V^H` (the left singular vectors) are overwritten on the
-   array ``a``;
-
-   if ``jobvt = job::novec``, no columns of :math:`V^T/V^H` (no left
-   singular vectors) are computed.
-
-   ``jobvt`` and ``jobu`` cannot both be ``job::overwritevec``.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-a
-   Pointer to array ``a``, size ``(lda,*)``, containing the
-   matrix :math:`A`. The second dimension of ``a`` must be at least
-   :math:`\max(1, m)`.
-
-lda
-   The leading dimension of ``a``.
-
-ldu
-   The leading dimension of ``u``.
-
-ldvt
-   The leading dimension of ``vt``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_gesvd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   On exit,
-
-   If ``jobu = job::overwritevec``, ``a`` is overwritten with the first
-   :math:`\min(m,n)` columns of :math:`U` (the left singular vectors stored
-   columnwise);
-
-   If ``jobvt = job::overwritevec``, ``a`` is overwritten with the first
-   :math:`\min(m, n)` rows of :math:`V^{T}`/:math:`V^{H}` (the right
-   singular vectors stored rowwise);
-
-   If ``jobu`` :math:`\ne` ``job::overwritevec`` and ``jobvt`` :math:`\ne` ``job::overwritevec``,
-   the contents of a are destroyed.
-
-s
-   Array containing the singular values, size at least
-   :math:`\max(1, \min(m,n))`. Contains the singular values of :math:`A` sorted
-   so that :math:`s(i) \ge s(i+1)`.
-
-u
-   Array containing :math:`U`; the second dimension of ``u`` must be at
-   least :math:`\max(1, m)` if ``jobu = job::allvec``, and at least
-   :math:`\max(1, \min(m, n))` if ``jobu = job::somevec``.
-
-   If ``jobu = job::allvec``, ``u`` contains the :math:`m \times m`
-   orthogonal/unitary matrix :math:`U`.
-
-   If ``jobu = job::somevec``, ``u`` contains the first :math:`\min(m, n)`
-   columns of :math:`U` (the left singular vectors stored column-wise).
-
-   If ``jobu = job::novec`` or ``job::overwritevec``, ``u`` is not
-   referenced.
-
-vt
-   Array containing :math:`V^{T}`; the second dimension of ``vt`` must
-   be at least :math:`\max(1, n)`.
-
-   If ``jobvt = job::allvec``, ``vt`` contains the :math:`n \times n`
-   orthogonal/unitary matrix :math:`V^{T}`/:math:`V^{H}`.
-
-   If ``jobvt = job::somevec``, ``vt`` contains the first :math:`\min(m, n)`
-   rows of :math:`V^{T}`/:math:`V^{H}` (the right singular
-   vectors stored row-wise).
-
-   If ``jobvt = job::novec`` or ``job::overwritevec``, ``vt`` is not
-   referenced.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
diff --git a/docs/domains/lapack/gesvd_scratchpad_size.rst b/docs/domains/lapack/gesvd_scratchpad_size.rst
deleted file mode 100644
index fab43c61f..000000000
--- a/docs/domains/lapack/gesvd_scratchpad_size.rst
+++ /dev/null
@@ -1,111 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_gesvd_scratchpad_size:
-
-gesvd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_gesvd` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``gesvd_scratchpad_size`` supports the following precisions.
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -  T 
-         * -  ``float`` 
-         * -  ``double`` 
-         * -  ``std::complex<float>`` 
-         * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_gesvd` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-gesvd_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobu, oneapi::mkl::job jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) 
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_gesvd` function will be performed.
-
-jobu
-   Must be ``job::allvec``, ``job::somevec``,
-   ``job::overwritevec``, or ``job::novec``. Specifies options for
-   computing all or part of the matrix :math:`U`.
-
-   If ``jobu = job::allvec``, all :math:`m` columns of :math:`U` are
-   returned in the buffer ``u``;
-
-   if ``jobu = job::somevec``, the first :math:`\min(m, n)` columns of
-   :math:`U` (the left singular vectors) are returned in the buffer ``v``;
-
-   if ``jobu = job::overwritevec``, the first :math:`\min(m, n)`
-   columns of :math:`U` (the left singular vectors) are overwritten on
-   the buffer ``a``;
-
-   if ``jobu = job::novec``, no columns of :math:`U` (no left singular
-   vectors) are computed.
-
-jobvt
-   Must be ``job::allvec``, ``job::somevec``,
-   ``job::overwritevec``, or ``job::novec``. Specifies options for
-   computing all or part of the matrix :math:`V^T/V^H`.
-
-   If ``jobvt = job::allvec``, all :math:`n` columns of :math:`V^T/V^H` are
-   returned in the buffer ``vt``;
-
-   if ``jobvt = job::somevec``, the first :math:`\min(m, n)` columns of
-   :math:`V^T/V^H` (the left singular vectors) are returned in the
-   buffer ``vt``;
-
-   if ``jobvt = job::overwritevec``, the first :math:`\min(m, n)`
-   columns of :math:`V^T/V^H` (the left singular vectors) are
-   overwritten on the buffer ``a``;
-
-   if ``jobvt = job::novec``, no columns of :math:`V^T/V^H` (no left
-   singular vectors) are computed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-ldu
-   The leading dimension of ``u``.
-
-ldvt
-   The leading dimension of ``vt``.
-
-.. container:: section
-
-   .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_gesvd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/getrf.rst b/docs/domains/lapack/getrf.rst
deleted file mode 100644
index d23594843..000000000
--- a/docs/domains/lapack/getrf.rst
+++ /dev/null
@@ -1,144 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrf:
-
-getrf
-=====
-
-Computes the LU factorization of a general :math:`m \times n` matrix.
-
-.. container:: section
-
-   .. rubric:: Description
-
-``getrf`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-The routine computes the LU factorization of a general
-:math:`m \times n` matrix :math:`A` as :math:`A = PLU`,
-
-where :math:`P` is a permutation matrix, :math:`L` is lower triangular with
-unit diagonal elements (lower trapezoidal if :math:`m > n`) and :math:`U` is
-upper triangular (upper trapezoidal if :math:`m < n`). The routine uses
-partial pivoting, with row interchanges.
-
-getrf (BUFFER Version)
-----------------------
-
-.. container:: section
-
-   .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<std::int64_t,1> &ipiv, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-m
-    The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-    The number of columns in :math:`A` (:math:`0 \le n`).
-
-a
-   Buffer holding input matrix :math:`A`. The buffer a contains    the matrix :math:`A`. The second dimension of a must be at least   :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-      Size of scratchpad memory as a number of floating point elements of type ``T``.
-      Size should not be less than the value returned by :ref:`onemkl_lapack_getrf_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by :math:`L` and :math:`U`. The unit diagonal    elements of :math:`L` are not stored.
-
-ipiv
-   Array, size at least :math:`\max(1,\min(m, n))`. Contains the    pivot indices; for :math:`1 \le i \le \min(m, n)`, row :math:`i` was interchanged with   row :math:`\text{ipiv}(i)`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-getrf (USM Version)
-----------------------
-
-.. container:: section
-
-   .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, std::int64_t *ipiv, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-m
-    The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-    The number of columns in :math:`A` (:math:`0 \le n`).
-
-a
-   Pointer to array holding input matrix :math:`A`. The second dimension of ``a`` must be at least   :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_getrf_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by :math:`L` and :math:`U`. The unit diagonal    elements of :math:`L` are not stored.
-
-ipiv
-   Array, size at least :math:`\max(1,\min(m, n))`. Contains the    pivot indices; for :math:`1 \le i \le \min(m, n)`, row :math:`i` was interchanged with   row :math:`\text{ipiv}(i)`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/getrf_batch.rst b/docs/domains/lapack/getrf_batch.rst
deleted file mode 100644
index 502707ee5..000000000
--- a/docs/domains/lapack/getrf_batch.rst
+++ /dev/null
@@ -1,226 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrf_batch:
-
-getrf_batch
-===========
-
-Computes the LU factorizations of a batch of general matrices.
-
-.. rubric:: Description
-
-``getrf_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_lapack_getrf_batch_buffer:
-
-getrf_batch (Buffer Version)
-----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``getrf_batch`` supports only the strided API. 
-
-**Strided API**
-
-The routine computes the LU factorizations of general :math:`m \times n` matrices :math:`A_i` as :math:`A_i = P_iL_iU_i`, where :math:`P_i` is a permutation matrix, :math:`L_i` is lower triangular with unit diagonal elements (lower trapezoidal if :math:`m > n`) and :math:`U_i` is upper triangular (upper trapezoidal if :math:`m < n`). The routine uses partial pivoting, with row interchanges.
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t> &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in matrices :math:`A_i` (:math:`0 \le n`).
-
-a
-  Array holding input matrices :math:`A_i`.
-
-lda
-  Leading dimension of matrices :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_ipiv
-  Stride between the beginnings of arrays :math:`ipiv_i` inside the array ``ipiv``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less than the value returned by the Strided API of the :ref:`onemkl_lapack_getrf_batch_scratchpad_size` function.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  :math:`L_i` and :math:`U_i`. The unit diagonal elements of :math:`L_i` are not stored.
-
-ipiv
-  Array containing batch of the pivot indices :math:`\text{ipiv}_i` each of size at least :math:`\max(1,\min(m,n))`; for :math:`1 \le k \le \min(m,n)`, where row :math:`k` of :math:`A_i` was interchanged with row :math:`\text{ipiv}_i(k)`.
-
-.. _onemkl_lapack_getrf_batch_usm:
-
-getrf_batch (USM Version)
--------------------------
-
-.. rubric:: Description
-
-The USM version of ``getrf_batch`` supports the group API and strided API. 
-
-**Group API**
-
-The routine computes the batch of LU factorizations of general :math:`m \times n` matrices :math:`A_i` (:math:`i \in \{1...batch\_size\}`) as :math:`A_i = P_iL_iU_i`, where :math:`P_i` is a permutation matrix, :math:`L_i` is lower triangular with unit diagonal elements (lower trapezoidal if :math:`m > n`) and :math:`U_i` is upper triangular (upper trapezoidal if :math:`m < n`). The routine uses partial pivoting, with row interchanges. Total number of problems to solve, ``batch_size``, is a sum of sizes of all of the groups of parameters as provided by ``group_sizes`` array.
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, T **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Array of ``group_count`` parameters :math:`m_g` specifying the number of rows in matrices :math:`A_i` (:math:`0 \le m_g`) belonging to group :math:`g`.
-
-n
-  Array of ``group_count`` parameters :math:`n_g` specifying the number of columns in matrices :math:`A_i` (:math:`0 \le n_g`) belonging to group :math:`g`.
-
-a
-  Array holding ``batch_size`` pointers to input matrices :math:`A_i`.
-
-lda
-  Array of ``group_count`` parameters :math:`lda_g` specifying the leading dimensions of :math:`A_i` belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of group_count integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Group API of the :ref:`onemkl_lapack_getrf_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  :math:`L_i` and :math:`U_i`. The unit diagonal elements of :math:`L_i` are not stored.
-
-ipiv
-  Arrays of batch_size pointers to arrays containing pivot indices :math:`\text{ipiv}_i` each of size at least :math:`\max(1,\min(m_g,n_g))`; for :math:`1 \le k \le \min(m_g,n_g)`, where row :math:`k` of :math:`A_i` was interchanged with row :math:`\text{ipiv}_i(k)`.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-The routine computes the LU factorizations of general :math:`m \times n` matrices :math:`A_i` as :math:`A_i = P_iL_iU_i`, where :math:`P_i` is a permutation matrix, :math:`L_i` is lower triangular with unit diagonal elements (lower trapezoidal if :math:`m > n`) and :math:`U_i` is upper triangular (upper trapezoidal if :math:`m < n`). The routine uses partial pivoting, with row interchanges.
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    };
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in matrices :math:`A_i` (:math:`0 \le n`).
-
-a
-  Array holding input matrices :math:`A_i`.
-
-lda
-  Leading dimension of matrices :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_ipiv
-  Stride between the beginnings of arrays :math:`\text{ipiv}_i` inside the array ``ipiv``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_getrf_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  :math:`L_i` and :math:`U_i`. The unit diagonal elements of :math:`L_i` are not stored.
-
-ipiv
-  Array containing batch of the pivot indices :math:`\text{ipiv}_i` each of size at least :math:`\max(1,\min(m,n))`; for :math:`1 \le k \le \min(m,n)`, where row :math:`k` of :math:`A_i` was interchanged with row :math:`\text{ipiv}_i(k)`.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
diff --git a/docs/domains/lapack/getrf_batch_scratchpad_size.rst b/docs/domains/lapack/getrf_batch_scratchpad_size.rst
deleted file mode 100644
index 27cfdb4da..000000000
--- a/docs/domains/lapack/getrf_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,117 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrf_batch_scratchpad_size:
-
-getrf_batch_scratchpad_size
-===========================
-
-Computes size of scratchpad memory required for the :ref:`onemkl_lapack_getrf_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``getrf_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_getrf_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Array of ``group_count`` parameters :math:`m_g` specifying the number of rows in the matrices belonging to group :math:`g`.
-
-n
-  Array of ``group_count`` parameters :math:`n_g` specifying the number of columns in matrices belonging to group :math:`g`.
-
-lda
-  Array of ``group_count`` parameters :math:`\text{lda}_g` specifying the leading dimensions of matrices belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_getrf_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_getrf_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in :math:`A_i` (:math:`0 \le n`).
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch  array ``a``.
-
-stride_ipiv
-  Stride between the beginnings of arrays :math:`\text{ipiv}_i` inside the array ``ipiv``.
-
-batch_size
-  Number of problems in a batch.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_getrf_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/getrf_scratchpad_size.rst b/docs/domains/lapack/getrf_scratchpad_size.rst
deleted file mode 100644
index ae382d0fc..000000000
--- a/docs/domains/lapack/getrf_scratchpad_size.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrf_scratchpad_size:
-
-getrf_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_getrf` function.
-
-.. container:: section
-
-   .. rubric:: Description
-
-``getrf_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_getrf` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-getrf_scratchpad_size
----------------------
-
-.. container:: section
-
-   .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_getrf` function will be performed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-.. container:: section
-
-   .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_getrf` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/getri.rst b/docs/domains/lapack/getri.rst
deleted file mode 100644
index d0f1854e8..000000000
--- a/docs/domains/lapack/getri.rst
+++ /dev/null
@@ -1,138 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getri:
-
-getri
-=====
-
-Computes the inverse of an LU-factored general matrix determined by
-:ref:`onemkl_lapack_getrf`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``getri`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-  
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine computes the inverse :math:`A^{-1}` of a general matrix
-:math:`A`. Before calling this routine, call :ref:`onemkl_lapack_getrf`
-to factorize :math:`A`.
-
-getri (BUFFER Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void getri(sycl::queue &queue, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<std::int64_t,1> &ipiv, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-n
-   The order of the matrix :math:`A` :math:`(0 \le n)`.
-
-a
-   The buffer ``a`` as returned by :ref:`onemkl_lapack_getrf`. Must
-   be of size at least :math:`\text{lda} \cdot \max(1,n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-ipiv
-   The buffer as returned by :ref:`onemkl_lapack_getrf`. The
-   dimension of ``ipiv`` must be at least :math:`\max(1, n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_getri_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by the :math:`n \times n` matrix :math:`A`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-getri (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getri(sycl::queue &queue, std::int64_t n, T *a, std::int64_t lda, std::int64_t *ipiv, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-n
-   The order of the matrix :math:`A` :math:`(0 \le n)`.
-
-a
-   The array as returned by :ref:`onemkl_lapack_getrf`. Must
-   be of size at least :math:`\text{lda} \cdot \max(1,n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-ipiv
-   The array as returned by :ref:`onemkl_lapack_getrf`. The
-   dimension of ``ipiv`` must be at least :math:`\max(1, n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_getri_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by the :math:`n \times n` matrix :math:`A`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/getri_batch.rst b/docs/domains/lapack/getri_batch.rst
deleted file mode 100644
index 9112100a7..000000000
--- a/docs/domains/lapack/getri_batch.rst
+++ /dev/null
@@ -1,229 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getri_batch:
-
-getri_batch
-===========
-
-Computes the inverses of a batch of LU-factored matrices determined by :ref:`onemkl_lapack_getrf_batch`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``getri_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_lapack_getri_batch_buffer:
-
-getri_batch (Buffer Version)
-----------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The buffer version of ``getri_batch`` supports only the strided API. 
-
-**Strided API**
-
-The routine computes the inverses :math:`A_i^{-1}` of general matrices :math:`A_i`. Before calling this routine, call the Strided API of the :ref:`onemkl_lapack_getrf_batch_buffer` function to factorize :math:`A_i`.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t> &ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-n
-  Order of the matrices :math:`A_i` (:math:`0 \le n`).
-
-a
-  Result of the Strided API of the :ref:`onemkl_lapack_getrf_batch_buffer` function.
-
-lda
-  Leading dimension of :math:`A_i` (:math:`n\le \text{lda}`).
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-ipiv
-  Arrays returned by the Strided API of the :ref:`onemkl_lapack_getrf_batch_buffer` function.
-
-stride_ipiv
-  Stride between the beginnings of arrays :math:`\text{ipiv}_i` inside the array ``ipiv``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less than the value returned by the Strided API of the :ref:`onemkl_lapack_getri_batch_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-  Inverse :math:`n \times n` matrices :math:`A_i^{-1}`.
-
-getri_batch (USM Version)
--------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The USM version of ``getri_batch`` supports the group API and strided API. 
-
-**Group API**
-
-The routine computes the inverses :math:`A_i^{-1}` of general matrices :math:`A_i`, :math:`i \in \{1...batch\_size\}`. Before calling this routine, call the Group API of the :ref:`onemkl_lapack_getrf_batch_usm` function to factorize :math:`A_i`.
-Total number of problems to solve, ``batch_size``, is a sum of sizes of all of the groups of parameters as provided by ``group_sizes`` array.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, T **a, std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-n
-  Array of ``group_count`` :math:`n_g` parameters specifying the order of the matrices :math:`A_i` (:math:`0 \le n_g`) belonging to group :math:`g`.
-
-a
-  Result of the Group API of the :ref:`onemkl_lapack_getrf_batch_usm` function.
-
-lda
-  Array of ``group_count`` :math:`\text{lda}_g` parameters specifying the leading dimensions of the matrices :math:`A_i` (:math:`n_g \le \text{lda}_g`) belonging to group :math:`g`.
-
-ipiv
-  Arrays returned by the Group API of the :ref:`onemkl_lapack_getrf_batch_usm` function.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of  type ``T``. Size should not be less than the value returned by the Group API of the :ref:`onemkl_lapack_getri_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-  Inverse :math:`n_g \times n_g` matrices :math:`A_i^{-1}`.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
-The routine computes the inverses :math:`A_i^{-1}` of general matrices :math:`A_i`. Before calling this routine, call the Strided API of the :ref:`onemkl_lapack_getrf_batch_usm` function to factorize :math:`A_i`.
-
-.. container:: section
-   
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getri_batch(sycl::queue &queue, std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-n
-  Order of the matrices :math:`A_i` (:math:`0 \le n`).
-
-a
-  Result of the Strided API of the :ref:`onemkl_lapack_getrf_batch_usm` function.
-
-lda
-  Leading dimension of :math:`A_i` (:math:`n \le \text{lda}`).
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-ipiv
-  Arrays returned by the Strided API of the :ref:`onemkl_lapack_getrf_batch_usm` function.
-
-stride_ipiv
-  Stride between the beginnings of arrays :math:`\text{ipiv}_i` inside the array ``ipiv``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size 
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less than the value returned by the Strided API of the :ref:`onemkl_lapack_getri_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-  Inverse :math:`n \times n` matrices :math:`A_i^{-1}`.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/getri_batch_scratchpad_size.rst b/docs/domains/lapack/getri_batch_scratchpad_size.rst
deleted file mode 100644
index 3e30e4400..000000000
--- a/docs/domains/lapack/getri_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,111 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getri_batch_scratchpad_size:
-
-getri_batch_scratchpad_size
-===========================
-
-Computed size of scratchpad memory required for the :ref:`onemkl_lapack_getri_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``getri_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_getri_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-	Device queue where calculations will be performed.
-
-n
-  Array of ``group_count`` :math:`n_g` parameters specifying the order of the matrices belonging to group :math:`g`.
-
-lda
-	Array of ``group_count`` :math:`\text{lda}_g` parameters specifying the leading dimensions of the matrices belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-	Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_getri_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_getri_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-	Device queue where calculations will be performed.
-
-n
-  The order of the matrices :math:`A_i` (0 \le n).
-
-lda
-	Leading dimension of :math:`A_i` (:math:`n \le \text{lda}`).
-
-stride_a
-	Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_ipiv
-	Stride between the beginnings of arrays :math:`ipiv_i` inside the array ipiv.
-
-batch_size
-	Specifies the number of problems in a batch.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_getri_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/getri_scratchpad_size.rst b/docs/domains/lapack/getri_scratchpad_size.rst
deleted file mode 100644
index 734f51a69..000000000
--- a/docs/domains/lapack/getri_scratchpad_size.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getri_scratchpad_size:
-
-getri_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_getri` function.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``getri_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_getri` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-getri_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-      
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_getri` function will be performed.
-
-n
-   The order of the matrix :math:`A` :math:`(0 \le n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-.. container:: section
-
-  .. rubric:: Return Value
-     
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_getri` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines` 
-
-
diff --git a/docs/domains/lapack/getrs.rst b/docs/domains/lapack/getrs.rst
deleted file mode 100644
index f96ca356a..000000000
--- a/docs/domains/lapack/getrs.rst
+++ /dev/null
@@ -1,200 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrs:
-
-getrs
-=====
-
-Solves a system of linear equations with an LU-factored square
-coefficient matrix, with multiple right-hand sides.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``getrs`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-  
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-The routine solves for :math:`X` the following systems of linear
-equations:
-
-    .. list-table:: 
-       :header-rows: 1
-    
-       * -     \ :math:`AX = B`\     
-         -     if ``trans``\ =\ ``oneapi::mkl::transpose::nontrans``\     
-       * -     \ :math:`A^TX = B`\     
-         -     if ``trans``\ =\ ``oneapi::mkl::transpose::trans``\     
-       * -     \ :math:`A^HX = B`\     
-         -     if ``trans``\ =\ ``oneapi::mkl::transpose::conjtrans``\     
-
-Before calling this routine, you must call
-:ref:`onemkl_lapack_getrf`
-to compute the LU factorization of :math:`A`.
-
-getrs (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-      
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<std::int64_t,1> &ipiv, sycl::buffer<T,1> &b, std::int64_t ldb, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-trans
-   Indicates the form of the equations:
-
-   If ``trans=oneapi::mkl::transpose::nontrans``, then :math:`AX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::trans``, then :math:`A^TX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::conjtrans``, then :math:`A^HX = B` is
-   solved for :math:`X`.
-
-n
-   The order of the matrix :math:`A` and the number of rows in matrix
-   :math:`B(0 \le n)`.
-
-nrhs
-   The number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-a
-   Buffer containing the factorization of the matrix :math:`A`, as
-   returned by :ref:`onemkl_lapack_getrf`. The second dimension of ``a`` must be at least
-   :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-ipiv
-   Array, size at least :math:`\max(1, n)`. The ``ipiv`` array, as returned by
-   :ref:`onemkl_lapack_getrf`.
-
-b
-   The array ``b`` contains the matrix :math:`B` whose columns are the
-   right-hand sides for the systems of equations. The second
-   dimension of ``b`` must be at least :math:`\max(1,\text{nrhs})`.
-
-ldb
-   The leading dimension of ``b``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_getrs_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-b
-   The buffer ``b`` is overwritten by the solution matrix :math:`X`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-getrs (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T *a, std::int64_t lda, std::int64_t *ipiv, T *b, std::int64_t ldb, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-trans
-   Indicates the form of the equations:
-
-   If ``trans=oneapi::mkl::transpose::nontrans``, then :math:`AX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::trans``, then :math:`A^TX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::conjtrans``, then :math:`A^HX = B` is
-   solved for :math:`X`.
-
-n
-   The order of the matrix :math:`A` and the number of rows in matrix
-   :math:`B(0 \le n)`.
-
-nrhs
-   The number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-a
-   Pointer to array containing the factorization of the matrix :math:`A`, as
-   returned by :ref:`onemkl_lapack_getrf`. The second dimension of ``a`` must be at least
-   :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-ipiv
-   Array, size at least :math:`\max(1, n)`. The ``ipiv`` array, as returned by
-   :ref:`onemkl_lapack_getrf`.
-
-b
-   The array ``b`` contains the matrix :math:`B` whose columns are the
-   right-hand sides for the systems of equations. The second
-   dimension of ``b`` must be at least :math:`\max(1,\text{nrhs})`.
-
-ldb
-   The leading dimension of ``b``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_getrs_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-b
-   The array ``b`` is overwritten by the solution matrix :math:`X`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-     
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
diff --git a/docs/domains/lapack/getrs_batch.rst b/docs/domains/lapack/getrs_batch.rst
deleted file mode 100644
index 4c23fb04d..000000000
--- a/docs/domains/lapack/getrs_batch.rst
+++ /dev/null
@@ -1,286 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrs_batch:
-
-getrs_batch
-===========
-
-Solves a system of linear equations with a batch of LU-factored square coefficient matrices, with multiple right-hand sides.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``getrs_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_lapack_getrs_batch_buffer:
-
-getrs_batch (Buffer Version)
-----------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The buffer version of ``getrs_batch`` supports only the strided API. 
-   
-**Strided API**
-
- | The routine solves for the following systems of linear equations :math:`X_i`: 
- | :math:`A_iX_i = B_i`, if ``trans=mkl::transpose::nontrans``
- | :math:`A_i^TX_i = B_i`, if ``trans=mkl::transpose::trans``
- | :math:`A_i^HX_i = B_i`, if ``trans=mkl::transpose::conjtrans``
- | Before calling this routine, the Strided API of the :ref:`onemkl_lapack_getrf_batch_buffer` function should be called to compute the LU factorizations of :math:`A_i`.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void getrs_batch(sycl::queue &queue, mkl::transpose trans, std::int64_t n, std::int64_t nrhs, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t> &ipiv, std::int64_t stride_ipiv, sycl::buffer<T> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-trans
- | Form of the equations:
- | If ``trans = mkl::transpose::nontrans``, then :math:`A_iX_i = B_i` is solved for :math:`Xi`.
- | If ``trans = mkl::transpose::trans``, then :math:`A_i^TX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::conjtrans``, then :math:`A_i^HX_i = B_i` is solved for :math:`X_i`.
-
-n
-  Order of the matrices :math:`A_i` and the number of rows in matrices :math:`B_i` (:math:`0 \le n`).
-
-nrhs
-  Number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-a
-  Array containing the factorizations of the matrices :math:`A_i`, as returned the Strided API of the :ref:`onemkl_lapack_getrf_batch_buffer` function.
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`B_i` inside the batch array ``b``.
-
-ipiv
-  ``ipiv`` array, as returned by the Strided API of the :ref:`onemkl_lapack_getrf_batch_buffer` function.
-
-stride_ipiv
-  Stride between the beginnings of arrays :math:`\text{ipiv}_i` inside the array ``ipiv``.
-
-b 
-  Array containing the matrices :math:`B_i` whose columns are the right-hand sides for the systems of equations.
-
-ldb
-  Leading dimension of :math:`B_i`.
-
-batch_size
-  Specifies the number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_getrs_batch_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-b  
-  Solution matrices :math:`X_i`.
-
-.. _onemkl_lapack_getrs_batch_usm:
-
-getrs_batch (USM Version)
--------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The USM version of ``getrs_batch`` supports the group API and strided API. 
-
-**Group API**
-
- | The routine solves the following systems of linear equations for :math:`X_i` (:math:`i \in \{1...batch\_size\}`):
- | :math:`A_iX_i = B_i`, if ``trans=mkl::transpose::nontrans``
- | :math:`A_i^TX_i = B_i`, if ``trans=mkl::transpose::trans``
- | :math:`A_i^HX_i = B_i`, if ``trans=mkl::transpose::conjtrans``
- | Before calling this routine, call the Group API of the :ref:`onemkl_lapack_getrf_batch_usm` function to compute the LU factorizations of :math:`A_i`.
- | Total number of problems to solve, ``batch_size``, is a sum of sizes of all of the groups of parameters as provided by ``group_sizes`` array.
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getrs_batch(sycl::queue &queue, mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, T **a, std::int64_t *lda, std::int64_t **ipiv, T **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-trans
- | Array of ``group_count`` parameters :math:`trans_g` indicating the form of the equations for the group :math:`g`:
- | If ``trans = mkl::transpose::nontrans``, then :math:`A_iX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::trans``, then :math:`A_i^TX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::conjtrans``, then :math:`A_i^HX_i = B_i` is solved for :math:`X_i`.
-
-n
-  Array of ``group_count`` parameters :math:`n_g` specifying the order of the matrices :math:`A_i` and the number of rows in matrices :math:`B_i` (:math:`0 \le n_g`) belonging to group :math:`g`.
-
-nrhs
-  Array of ``group_count`` parameters :math:`\text{nrhs}_g` specifying the number of right-hand sides (:math:`0 \le \text{nrhs}_g`) for group :math:`g`.
-
-a
-  Array of ``batch_size`` pointers to factorizations of the matrices :math:`A_i`, as returned by the Group API of the:ref:`onemkl_lapack_getrf_batch_usm` function.
-
-lda
-  Array of ``group_count`` parameters :math:`\text{lda}_g` specifying the leading dimensions of :math:`A_i` from group :math:`g`.
-
-ipiv
-  ``ipiv`` array, as returned by the Group API of the :ref:`onemkl_lapack_getrf_batch_usm` function.
-
-b 
-  The array containing ``batch_size`` pointers to the matrices :math:`B_i` whose columns are the right-hand sides for the systems of equations.
-
-ldb
-  Array of ``group_count`` parameters :math:`\text{ldb}_g` specifying the leading dimensions of :math:`B_i` in the group :math:`g`.
-
-group_count
-  Specifies the number of groups of parameters. Must be at least 0.
-    
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-    
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Group API of the :ref:`onemkl_lapack_getrs_batch_scratchpad_size` function.
-  
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-b  
-  Solution matrices :math:`X_i`.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
- | The routine solves the following systems of linear equations for :math:`X_i`:
- | :math:`A_iX_i = B_i`, if ``trans=mkl::transpose::nontrans``
- | :math:`A_i^TX_i = B_i`, if ``trans=mkl::transpose::trans``
- | :math:`A_i^HX_i = B_i`, if ``trans=mkl::transpose::conjtrans``
- | Before calling this routine, the Strided API of the :ref:`onemkl_lapack_getrf_batch` function should be called to compute the LU factorizations of :math:`A_i`.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event getrs_batch(sycl::queue &queue, mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, T *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-trans
- | Form of the equations:
- | If ``trans = mkl::transpose::nontrans``, then :math:`A_iX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::trans``, then :math:`A_i^TX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::conjtrans``, then :math:`A_i^HX_i = B_i` is solved for :math:`X_i`.
-
-n
-  Order of the matrices :math:`A_i` and the number of rows in matrices :math:`B_i` (:math:`0 \le n`).
-
-nrhs
-  Number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-a
-  Array containing the factorizations of the matrices :math:`A_i`, as returned by the Strided API of the:ref:`onemkl_lapack_getrf_batch_usm` function.
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a  
-  Stride between the beginnings of matrices :math:`B_i` inside the batch array ``b``.
-
-ipiv
-  ``ipiv`` array, as returned by getrf_batch (USM) function.
-
-stride_ipiv
-  Stride between the beginnings of arrays :math:`\text{ipiv}_i` inside the array ``ipiv``.
-
-b
-  Array containing the matrices :math:`B_i` whose columns are the right-hand sides for the systems of equations.
-
-ldb
-  Leading dimensions of :math:`B_i`.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-    
-scratchpad_size 
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_getrs_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-b  
-  Solution matrices :math:`X_i`.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/getrs_batch_scratchpad_size.rst b/docs/domains/lapack/getrs_batch_scratchpad_size.rst
deleted file mode 100644
index 491524482..000000000
--- a/docs/domains/lapack/getrs_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,135 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrs_batch_scratchpad_size:
-
-getrs_batch_scratchpad_size
-===========================
-
-Computes size of scratchpad memory required for the :ref:`onemkl_lapack_getrs_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``getrs_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_getrs_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-trans
- | Array of ``group_count`` parameters :math:`\text{trans}_g` indicating the form of the equations for the group :math:`g`:
- | If ``trans = mkl::transpose::nontrans``, then :math:`A_iX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::trans``, then :math:`A_i^TX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::conjtrans``, then :math:`A_iHX_i = B_i` is solved for :math:`X_i`.
-
-n
-  Array of ``group_count`` parameters :math:`n_g` specifying the order of the matrices :math:`A_i` and the number of rows in matrices :math:`B_i` (:math:`0 \le n_g`) belonging to group :math:`g`.
-
-nrhs
-  Array of ``group_count`` parameters nrhsg specifying the number of right-hand sides (:math:`0 \le \text{nrhs}_g`) for group :math:`g`.
-
-lda
-  Array of ``group_count`` parameters :math:`\text{lda}_g` specifying the leading dimensions of :math:`A_i` from group :math:`g`.
-
-ldb
-  Array of ``group_count`` parameters :math:`\text{ldb}_g` specifying the leading dimensions of :math:`B_i` in the group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_getrs_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_getrs_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-trans
- | Indicates the form of the equations:
- | ``If trans = mkl::transpose::nontrans``, then :math:`A_iX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::trans``, then :math:`A_i^TX_i = B_i` is solved for :math:`X_i`.
- | If ``trans = mkl::transpose::conjtrans``, then :math:`A_i^HX_i = B_i` is solved for :math:`X_i`.
-
-n
-  Order of the matrices :math:`A_i` and the number of rows in matrices :math:`B_i` (:math:`0 \le n`).
-
-nrhs
-  Number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`B_i` inside the batch array ``b``.
-
-stride_ipiv
-  Stride between the beginnings of arrays ipivi inside the array ``ipiv``.
-
-ldb
-  Leading dimension of :math:`B_i`.
-
-batch_size
-  Number of problems in a batch.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_getrs_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/getrs_scratchpad_size.rst b/docs/domains/lapack/getrs_scratchpad_size.rst
deleted file mode 100644
index 8a2741745..000000000
--- a/docs/domains/lapack/getrs_scratchpad_size.rst
+++ /dev/null
@@ -1,85 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_getrs_scratchpad_size:
-
-getrs_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_getrs` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``getrs_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-      
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_getrs` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-getrs_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-      
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_getrs` function will be performed.
-
-trans
-   Indicates the form of the equations:
-
-   If ``trans=oneapi::mkl::transpose::nontrans``, then :math:`AX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::trans``, then :math:`A^TX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::conjtrans``, then :math:`A^HX = B` is
-   solved for :math:`X`.
-
-n
-   The order of the matrix :math:`A` :math:`(0 \le n)` and the number of rows in matrix
-   :math:`B(0 \le n)`.
-
-nrhs
-   The number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-lda
-   The leading dimension of ``a``.
-
-ldb
-   The leading dimension of ``b``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_getrs` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines` 
-
-
diff --git a/docs/domains/lapack/heevd.rst b/docs/domains/lapack/heevd.rst
deleted file mode 100644
index 0e0247dd3..000000000
--- a/docs/domains/lapack/heevd.rst
+++ /dev/null
@@ -1,182 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_heevd:
-
-heevd
-=====
-
-Computes all eigenvalues and, optionally, all eigenvectors of a
-complex Hermitian matrix using divide and conquer algorithm.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``heevd`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine computes all the eigenvalues, and optionally all the
-eigenvectors, of a complex Hermitian matrix :math:`A`. In other words, it
-can compute the spectral factorization of :math:`A` as: :math:`A = Z\Lambda Z^H`.
-
-Here :math:`\Lambda` is a real diagonal matrix whose diagonal elements are the
-eigenvalues :math:`\lambda_i`, and :math:`Z` is the (complex) unitary matrix
-whose columns are the eigenvectors :math:`z_{i}`. Thus,
-
-:math:`Az_i = \lambda_i z_i` for :math:`i = 1, 2, ..., n`.
-
-If the eigenvectors are requested, then this routine uses a divide
-and conquer algorithm to compute eigenvalues and eigenvectors.
-However, if only eigenvalues are required, then it uses the
-Pal-Walker-Kahan variant of the QL or QR algorithm.
-
-heevd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, butter<T,1> &a, std::int64_t lda, sycl::buffer<realT,1> &w, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, a stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = job::lower``, a stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   The buffer ``a``, size (``lda,*``). The buffer ``a`` contains the matrix
-   :math:`A`. The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``. Must be at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_heevd_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   If ``jobz = job::vec``, then on exit this buffer is overwritten by
-   the unitary matrix :math:`Z` which contains the eigenvectors of :math:`A`.
-
-w
-   Buffer, size at least n. Contains the eigenvalues
-   of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-heevd (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, butter<T,1> &a, std::int64_t lda, RealT *w, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, a stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = job::lower``, a stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   Pointer to array containing :math:`A`, size (``lda,*``).The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``. Must be at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_heevd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   If ``jobz = job::vec``, then on exit this array is overwritten by
-   the unitary matrix :math:`Z` which contains the eigenvectors of :math:`A`.
-
-w
-   Pointer to array of size at least :math:`n`. Contains the eigenvalues
-   of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/lapack/heevd_scratchpad_size.rst b/docs/domains/lapack/heevd_scratchpad_size.rst
deleted file mode 100644
index 4825f73e7..000000000
--- a/docs/domains/lapack/heevd_scratchpad_size.rst
+++ /dev/null
@@ -1,81 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_heevd_scratchpad_size:
-
-heevd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_heevd` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``heevd_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_heevd` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-heevd_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-      
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_heevd` function will be performed.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, a stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = job::lower``, a stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-      
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_heevd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/hegvd.rst b/docs/domains/lapack/hegvd.rst
deleted file mode 100644
index 266b9964d..000000000
--- a/docs/domains/lapack/hegvd.rst
+++ /dev/null
@@ -1,249 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_hegvd:
-
-hegvd
-=====
-
-Computes all eigenvalues and, optionally, eigenvectors of a real
-generalized symmetric definite eigenproblem using a divide and
-conquer method.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``hegvd`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-
-The routine computes all the eigenvalues, and optionally, the
-eigenvectors of a complex generalized Hermitian positive-definite
-eigenproblem, of the form
-
-:math:`Ax = \lambda Bx, ABx = \lambda x`, or :math:`BAx =\lambda x`.
-
-Here :math:`A` and :math:`B` are assumed to be Hermitian and :math:`B` is also
-positive definite.
-
-It uses a divide and conquer algorithm.
-
-hegvd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &b, std::int64_t ldb, sycl::buffer<realT,1> &w, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-itype
-   Must be 1 or 2 or 3. Specifies the problem type to be solved:
-
-   if :math:`\text{itype} = 1`, the problem type is :math:`Ax = \lambda Bx;`
-
-   if :math:`\text{itype} = 2`, the problem type is :math:`ABx = \lambda x;`
-
-   if :math:`\text{itype} = 3`, the problem type is :math:`BAx = \lambda x`.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` and ``b`` store the upper
-   triangular part of :math:`A` and :math:`B`.
-
-   If ``upper_lower = uplo::lower``, ``a`` and ``b`` stores the lower
-   triangular part of :math:`A` and :math:`B`.
-
-n
-   The order of the matrices :math:`A` and :math:`B` (:math:`0 \le n`).
-
-a
-   Buffer, size ``a(lda,*)`` contains the upper or lower triangle of
-   the Hermitian matrix :math:`A`, as specified by upper_lower.
-
-   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1,n)`.
-
-b
-   Buffer, size ``b(ldb,*)`` contains the upper or lower triangle of
-   the Hermitian matrix :math:`B`, as specified by upper_lower.
-
-   The second dimension of ``b`` must be at least :math:`\max(1, n)`.
-
-ldb
-   The leading dimension of ``b``; at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_hegvd_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   On exit, if ``jobz = job::vec``, then if :math:`\text{info} = 0`, ``a`` contains
-   the matrix :math:`Z` of eigenvectors. The eigenvectors are normalized
-   as follows:
-
-   if :math:`\text{itype} = 1` or :math:`\text{itype} = 2`, :math:`Z^{H}BZ = I`;
-
-   if :math:`\text{itype} = 3`, :math:`Z^{H}B^{-1}Z = I`;
-
-   If ``jobz = job::novec``, then on exit the upper triangle (if
-   ``upper_lower = uplo::upper``) or the lower triangle (if
-   ``upper_lower = uplo::lower``) of :math:`A`, including the diagonal,
-   is destroyed.
-
-b
-   On exit, if :math:`\text{info} \le n`, the part of ``b`` containing the matrix is
-   overwritten by the triangular factor :math:`U` or :math:`L` from the
-   Cholesky factorization :math:`B = U^{H}U`\ or :math:`B = LL^{H}`.
-
-w
-   Buffer, size at least :math:`n`. If :math:`\text{info} = 0`, contains the eigenvalues
-   of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-hegvd (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-      
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *b, std::int64_t ldb, RealT *w, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-itype
-   Must be 1 or 2 or 3. Specifies the problem type to be solved:
-
-   if :math:`\text{itype} = 1`, the problem type is :math:`Ax = \lambda Bx;`
-
-   if :math:`\text{itype} = 2`, the problem type is :math:`ABx = \lambda x;`
-
-   if :math:`\text{itype} = 3`, the problem type is :math:`BAx = \lambda x`.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` and ``b`` store the upper
-   triangular part of :math:`A` and :math:`B`.
-
-   If ``upper_lower = uplo::lower``, ``a`` and ``b`` stores the lower
-   triangular part of :math:`A` and :math:`B`.
-
-n
-   The order of the matrices :math:`A` and :math:`B` (:math:`0 \le n`).
-
-a
-   Pointer to array of size ``a(lda,*)`` containing the upper or lower triangle of
-   the Hermitian matrix :math:`A`, as specified by upper_lower.
-   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1,n)`.
-
-b
-   Pointer to array of size ``b(ldb,*)`` containing the upper or lower triangle of
-   the Hermitian matrix :math:`B`, as specified by upper_lower.
-   The second dimension of ``b`` must be at least :math:`\max(1, n)`.
-
-ldb
-   The leading dimension of ``b``; at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_hegvd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   On exit, if ``jobz = job::vec``, then if :math:`\text{info} = 0`, ``a`` contains
-   the matrix :math:`Z` of eigenvectors. The eigenvectors are normalized
-   as follows:
-
-   if :math:`\text{itype} = 1`` or :math:`\text{itype} = 2`, :math:`Z^{H}BZ = I`;
-
-   if :math:`\text{itype} = 3`, :math:`Z^{H} B^{-1} Z = I`;
-
-   If ``jobz = job::novec``, then on exit the upper triangle (if
-   ``upper_lower = uplo::upper``) or the lower triangle (if
-   ``upper_lower = uplo::lower``) of :math:`A`, including the diagonal,
-   is destroyed.
-
-b
-   On exit, if :math:`\text{info} \le n`, the part of ``b`` containing the matrix is
-   overwritten by the triangular factor :math:`U` or :math:`L` from the
-   Cholesky factorization :math:`B = U^{H}U`\ or :math:`B` =
-   :math:`LL^{H}`.
-
-w
-   Pointer to array of size at least n. If :math:`\text{info} = 0`, contains the eigenvalues
-   of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/lapack/hegvd_scratchpad_size.rst b/docs/domains/lapack/hegvd_scratchpad_size.rst
deleted file mode 100644
index 8e33d8b63..000000000
--- a/docs/domains/lapack/hegvd_scratchpad_size.rst
+++ /dev/null
@@ -1,95 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_hegvd_scratchpad_size:
-
-hegvd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_hegvd` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``hegvd_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_hegvd` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-hegvd_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda, std::int64_t ldb) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_hegvd` function will be performed.
-
-itype
-   Must be 1 or 2 or 3. Specifies the problem type to be solved:
-
-   if :math:`\text{itype} = 1`, the problem type is :math:`Ax = \lambda Bx`;
-
-   if :math:`\text{itype} = 2`, the problem type is :math:`ABx = \lambda x`;
-
-   if :math:`\text{itype} = 3`, the problem type is :math:`BAx = \lambda x`.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` and ``b`` store the upper
-   triangular part of :math:`A` and :math:`B`.
-
-   If ``upper_lower = uplo::lower``, ``a`` and ``b`` store the lower
-   triangular part of :math:`A` and :math:`B`.
-
-n
-   The order of the matrices :math:`A` and :math:`B` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``. Currently ``lda`` is not referenced in
-   this function.
-
-ldb
-   The leading dimension of ``b``. Currently ``ldb`` is not referenced in
-   this function.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_hegvd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/hetrd.rst b/docs/domains/lapack/hetrd.rst
deleted file mode 100644
index b8c855d62..000000000
--- a/docs/domains/lapack/hetrd.rst
+++ /dev/null
@@ -1,206 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_hetrd:
-
-hetrd
-=====
-
-Reduces a complex Hermitian matrix to tridiagonal form.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``hetrd`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-  
-      * -  Routine name 
-        -  T 
-      * -  ``chetrd`` 
-        -  ``std::complex<float>`` 
-      * -  ``zhetrd`` 
-        -  ``std::complex<double>``
-
-The routine reduces a complex Hermitian matrix :math:`A` to symmetric
-tridiagonal form :math:`T` by a unitary similarity transformation:
-:math:`A = QTQ^H`. The unitary matrix :math:`Q` is not formed explicitly but
-is represented as a product of :math:`n-1` elementary reflectors.
-Routines are provided to work with :math:`Q` in this representation.
-
-hetrd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void hetrd(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<realT,1> &d, sycl::buffer<realT,1> &e, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = uplo::lower``, ``a`` stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrices :math:`A` :math:`(0 \le n)`.
-
-a
-   Buffer, size ``(lda,*)``. The buffer ``a`` contains either the upper
-   or lower triangle of the Hermitian matrix :math:`A`, as specified by
-   upper_lower.
-
-   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1, n)`
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_hetrd_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   On exit,
-
-   if ``upper_lower = uplo::upper``, the diagonal and first
-   superdiagonal of :math:`A` are overwritten by the corresponding
-   elements of the tridiagonal matrix :math:`T`, and the elements above
-   the first superdiagonal, with the buffer ``tau``, represent the
-   orthogonal matrix :math:`Q` as a product of elementary reflectors;
-
-   if ``upper_lower = uplo::lower``, the diagonal and first
-   subdiagonal of :math:`A` are overwritten by the corresponding elements
-   of the tridiagonal matrix :math:`T`, and the elements below the first
-   subdiagonal, with the buffer ``tau``, represent the orthogonal matrix
-   :math:`Q` as a product of elementary reflectors.
-
-d
-   Buffer containing the diagonal elements of the matrix :math:`T`. The
-   dimension of ``d`` must be at least :math:`\max(1, n)`.
-
-e
-   Buffer containing the off diagonal elements of the matrix :math:`T`.
-   The dimension of ``e`` must be at least :math:`\max(1, n-1)`.
-
-tau
-   Buffer, size at least :math:`\max(1, n-1)`. Stores :math:`(n-1)` scalars
-   that define elementary reflectors in decomposition of the unitary
-   matrix :math:`Q` in a product of :math:`n-1` elementary reflectors.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-hetrd (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, RealT *d, RealT *e, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = uplo::lower``, ``a`` stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrices :math:`A` :math:`(0 \le n)`.
-
-a
-   The pointer to matrix :math:`A`, size ``(lda,*)``. Contains either the upper
-   or lower triangle of the Hermitian matrix :math:`A`, as specified by
-   ``upper_lower``.
-   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1, n)`
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_hetrd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   On exit,
-
-   if ``upper_lower = uplo::upper``, the diagonal and first
-   superdiagonal of :math:`A` are overwritten by the corresponding
-   elements of the tridiagonal matrix :math:`T`, and the elements above
-   the first superdiagonal, with the array ``tau``, represent the
-   orthogonal matrix :math:`Q` as a product of elementary reflectors;
-
-   if ``upper_lower = uplo::lower``, the diagonal and first
-   subdiagonal of :math:`A` are overwritten by the corresponding elements
-   of the tridiagonal matrix :math:`T`, and the elements below the first
-   subdiagonal, with the array ``tau``, represent the orthogonal matrix
-   :math:`Q` as a product of elementary reflectors.
-
-d
-   Pointer to diagonal elements of the matrix :math:`T`. The
-   dimension of ``d`` must be at least :math:`\max(1, n)`.
-
-e
-   Pointer to off diagonal elements of the matrix :math:`T`.
-   The dimension of ``e`` must be at least :math:`\max(1, n-1)`.
-
-tau
-   Pointer to array of size at least :math:`\max(1, n-1)`. Stores :math:`(n-1)` scalars
-   that define elementary reflectors in decomposition of the unitary
-   matrix :math:`Q` in a product of :math:`n-1` elementary reflectors.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/hetrd_scratchpad_size.rst b/docs/domains/lapack/hetrd_scratchpad_size.rst
deleted file mode 100644
index 006d50a3c..000000000
--- a/docs/domains/lapack/hetrd_scratchpad_size.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_hetrd_scratchpad_size:
-
-hetrd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_hetrd` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``hetrd_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_hetrd` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-hetrd_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_hetrd` function will be performed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` stores the upper triangular
-   part of :math:`A` and :math:`B`.
-
-   If ``upper_lower = uplo::lower``, ``a`` stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrices :math:`A` and :math:`B` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``. Currently, ``lda`` is not referenced in
-   this function.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_hetrd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/hetrf.rst b/docs/domains/lapack/hetrf.rst
deleted file mode 100644
index 7be2b4bbe..000000000
--- a/docs/domains/lapack/hetrf.rst
+++ /dev/null
@@ -1,164 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_hetrf:
-
-hetrf
-=====
-
-Computes the Bunch-Kaufman factorization of a complex Hermitian matrix.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``hetrf`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine computes the factorization of a complex Hermitian
-matrix :math:`A` using the Bunch-Kaufman diagonal pivoting method. The
-form of the factorization is:
-
--  if ``upper_lower=uplo::upper``, :math:`A` = :math:`UDU^{H}`
-
--  if ``upper_lower=uplo::lower``, :math:`A` = :math:`LDL^{H}`
-
-where :math:`A` is the input matrix, :math:`U` and :math:`L` are products of
-permutation and triangular matrices with unit diagonal (upper
-triangular for :math:`U` and lower triangular for :math:`L`), and :math:`D` is a
-Hermitian block-diagonal matrix with :math:`1 \times 1` and :math:`2 \times 2` diagonal
-blocks. :math:`U` and :math:`L` have :math:`2 \times 2` unit diagonal blocks
-corresponding to the :math:`2 \times 2` blocks of :math:`D`.
-
-hetrf (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void hetrf(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<int_64,1> &ipiv, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of    :math:`A` is stored and how :math:`A` is factored:
-
-      If ``upper_lower=uplo::upper``, the buffer ``a`` stores the upper triangular part of the matrix :math:`A`, and :math:`A` is factored as :math:`UDU^H`.
-
-      If ``upper_lower=uplo::lower``, the buffer ``a`` stores the lower triangular part of the matrix :math:`A`, and :math:`A` is factored as :math:`LDL^H`.
-
-n
-   The order of matrix :math:`A` (:math:`0 \le n`).
-
-a
-   The buffer ``a``, size :math:`\max(1,\text{lda} \cdot n)`. The buffer ``a``    contains either the upper or the lower triangular part of the matrix   :math:`A` (see ``upper_lower``). The second dimension of ``a`` must be at   least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by the routine for storing intermediate results.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_hetrf_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   The upper or lower triangular part of a is overwritten by    details of the block-diagonal matrix :math:`D` and the multipliers used   to obtain the factor :math:`U` (or :math:`L`).
-
-ipiv
-   Buffer, size at least :math:`\max(1, n)`. Contains details of    the interchanges and the block structure of :math:`D`. If   :math:`\text{ipiv}(i)=k>0`, then :math:`d_{ii}` is a :math:`1 \times 1` block, and the   :math:`i`-th row and column of :math:`A` was interchanged with the :math:`k`-th   row and column.
-
-      If ``upper_lower=oneapi::mkl::uplo::upper``   and :math:`\text{ipiv}(i)=\text{ipiv}(i-1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i`-1, and (:math:`i-1`)-th row and column of   :math:`A` was interchanged with the :math:`m`-th row and   column.
-
-      If ``upper_lower=oneapi::mkl::uplo::lower`` and   :math:`\text{ipiv}(i)=\text{ipiv}(i+1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i+1`, and (:math:`i+1`)-th row and column   of :math:`A` was interchanged with the :math:`m`-th row and column.
-
-hetrf (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, int_64 *ipiv, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of    :math:`A` is stored and how :math:`A` is factored:
-
-      If   ``upper_lower=uplo::upper``, the array ``a`` stores the upper triangular   part of the matrix :math:`A`, and :math:`A` is factored as :math:`UDU^H`.
-
-      If ``upper_lower=uplo::lower``, the array ``a`` stores   the lower triangular part of the matrix :math:`A`, and :math:`A` is factored   as :math:`LDL^H`.
-
-n
-   The order of matrix :math:`A` (:math:`0 \le n`).
-
-a
-   The pointer to :math:`A`, size :math:`\max(1,\text{lda} \cdot n)`, containing either the upper or the lower triangular part of the matrix   :math:`A` (see ``upper_lower``). The second dimension of ``a`` must be at   least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad
-   Pointer to scratchpad memory to be used by the routine for storing intermediate results.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_hetrf_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   The upper or lower triangular part of a is overwritten by    details of the block-diagonal matrix :math:`D` and the multipliers used   to obtain the factor :math:`U` (or :math:`L`).
-
-ipiv
-   Pointer to array of size at least :math:`\max(1, n)`. Contains details of    the interchanges and the block structure of :math:`D`. If   :math:`\text{ipiv}(i)=k>0`, then :math:`d_{ii}` is a :math:`1 \times 1` block, and the   :math:`i`-th row and column of :math:`A` was interchanged with the :math:`k`-th   row and column.
-
-      If ``upper_lower=oneapi::mkl::uplo::upper``   and :math:`\text{ipiv}(i)=\text{ipiv}(i-1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i-1`, and (:math:`i-1`)-th row and column of   :math:`A` was interchanged with the :math:`m`-th row and   column.
-      
-      If ``upper_lower=oneapi::mkl::uplo::lower`` and   :math:`\text{ipiv}(i)=\text{ipiv}(i+1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i+1`, and (:math:`i+1`)-th row and column   of :math:`A` was interchanged with the :math:`m`-th row and column.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/hetrf_scratchpad_size.rst b/docs/domains/lapack/hetrf_scratchpad_size.rst
deleted file mode 100644
index 38447dd6e..000000000
--- a/docs/domains/lapack/hetrf_scratchpad_size.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_hetrf_scratchpad_size:
-
-hetrf_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_hetrf` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``hetrf_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-  
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_hetrf` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-hetrf_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_hetrf` function will be performed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of :math:`A` is
-   stored and how :math:`A` is factored:
-
-   If ``upper_lower=uplo::upper``, the buffer ``a`` stores the
-   upper triangular part of the matrix :math:`A`, and :math:`A` is
-   factored as :math:`UDU^H`.
-
-   If ``upper_lower=uplo::lower``, the buffer ``a`` stores the
-   lower triangular part of the matrix :math:`A`, and :math:`A` is
-   factored as :math:`LDL^H`
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_hetrf` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/lapack-like-extensions.inc.rst b/docs/domains/lapack/lapack-like-extensions.inc.rst
deleted file mode 100644
index b3378b25b..000000000
--- a/docs/domains/lapack/lapack-like-extensions.inc.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack-like-extensions-routines:
-
-LAPACK-like Extensions Routines
-===============================
-
-
-.. container::
-
-
-   oneAPI Math Kernel Library DPC++ provides additional routines to
-   extend the functionality of the LAPACK routines. These include routines
-   to compute many independent factorizations, linear equation solutions, and similar.
-   The following table lists the LAPACK-like Extensions routine groups.
-
-
-   .. container:: tablenoborder
-
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -     Routines
-           -     Scratchpad Size Routines
-           -     Description     
-         * -     :ref:`onemkl_lapack_geqrf_batch`
-           -     :ref:`onemkl_lapack_geqrf_batch_scratchpad_size`
-           -     Computes the QR factorizations of a batch of general matrices.
-         * -     :ref:`onemkl_lapack_getrf_batch`
-           -     :ref:`onemkl_lapack_getrf_batch_scratchpad_size`
-           -     Computes the LU factorizations of a batch of general matrices.   
-         * -     :ref:`onemkl_lapack_getri_batch`
-           -     :ref:`onemkl_lapack_getri_batch_scratchpad_size`
-           -     Computes the inverses of a batch of LU-factored general matrices.   
-         * -     :ref:`onemkl_lapack_getrs_batch`
-           -     :ref:`onemkl_lapack_getrs_batch_scratchpad_size`
-           -     Solves systems of linear equations with a batch of LU-factored square coefficient matrices, with multiple right-hand sides.    
-         * -     :ref:`onemkl_lapack_orgqr_batch`
-           -     :ref:`onemkl_lapack_orgqr_batch_scratchpad_size`
-           -     Generates the real orthogonal/complex unitary matrix :math:`Q_i` of the QR factorization formed by geqrf_batch.
-         * -     :ref:`onemkl_lapack_potrf_batch`
-           -     :ref:`onemkl_lapack_potrf_batch_scratchpad_size`
-           -     Computes the Cholesky factorization of a batch of symmetric (Hermitian) positive-definite matrices.   
-         * -     :ref:`onemkl_lapack_potrs_batch`
-           -     :ref:`onemkl_lapack_potrs_batch_scratchpad_size`
-           -     Solves systems of linear equations with a batch of Cholesky-factored symmetric (Hermitian) positive-definite coefficient matrices, with multiple right-hand sides.    
-         * -     :ref:`onemkl_lapack_ungqr_batch`
-           -     :ref:`onemkl_lapack_ungqr_batch_scratchpad_size`
-           -     Generates the complex unitary matrix :math:`Q_i` with the QR factorization formed by geqrf_batch.
-
-
-
-.. toctree::
-    :hidden:
-
-    geqrf_batch
-    geqrf_batch_scratchpad_size
-    getrf_batch
-    getrf_batch_scratchpad_size
-    getri_batch
-    getri_batch_scratchpad_size
-    getrs_batch
-    getrs_batch_scratchpad_size
-    orgqr_batch
-    orgqr_batch_scratchpad_size
-    potrf_batch
-    potrf_batch_scratchpad_size
-    potrs_batch
-    potrs_batch_scratchpad_size
-    ungqr_batch
-    ungqr_batch_scratchpad_size
diff --git a/docs/domains/lapack/lapack-linear-equation-routines.inc.rst b/docs/domains/lapack/lapack-linear-equation-routines.inc.rst
deleted file mode 100644
index 6e6c25574..000000000
--- a/docs/domains/lapack/lapack-linear-equation-routines.inc.rst
+++ /dev/null
@@ -1,121 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack-linear-equation-routines:
-
-LAPACK Linear Equation Routines
-===============================
-
-
-.. container::
-
-
-   LAPACK Linear Equation routines are used for factoring a matrix,
-   solving a system of linear equations, solving linear least squares problems,
-   and inverting a matrix. The following table lists the LAPACK Linear Equation
-   routine groups.
-
-
-   .. container:: tablenoborder
-
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -     Routines
-           -     Scratchpad Size Routines
-           -     Description     
-         * -     :ref:`onemkl_lapack_geqrf`
-           -     :ref:`onemkl_lapack_geqrf_scratchpad_size`
-           -     Computes the QR factorization of a general m-by-n matrix.
-         * -     :ref:`onemkl_lapack_gerqf`
-           -     :ref:`onemkl_lapack_gerqf_scratchpad_size`
-           -     Computes the RQ factorization of a general m-by-n matrix.
-         * -     :ref:`onemkl_lapack_getrf`
-           -     :ref:`onemkl_lapack_getrf_scratchpad_size`
-           -     Computes the LU factorization of a general m-by-n matrix.   
-         * -     :ref:`onemkl_lapack_getri`
-           -     :ref:`onemkl_lapack_getri_scratchpad_size`
-           -     Computes the inverse of an LU-factored general matrix.   
-         * -     :ref:`onemkl_lapack_getrs`
-           -     :ref:`onemkl_lapack_getrs_scratchpad_size`
-           -     Solves a system of linear equations with an LU-factored square coefficient matrix, with multiple right-hand sides.    
-         * -     :ref:`onemkl_lapack_hetrf`
-           -     :ref:`onemkl_lapack_hetrf_scratchpad_size`
-           -     Computes the Bunch-Kaufman factorization of a complex Hermitian matrix.
-         * -     :ref:`onemkl_lapack_orgqr`
-           -     :ref:`onemkl_lapack_orgqr_scratchpad_size`
-           -     Generates the real orthogonal matrix :math:`Q` of the QR factorization formed by geqrf.
-         * -     :ref:`onemkl_lapack_ormqr`
-           -     :ref:`onemkl_lapack_ormqr_scratchpad_size`
-           -     Multiplies a real matrix by the orthogonal matrix :math:`Q` of the QR factorization formed by geqrf.
-         * -     :ref:`onemkl_lapack_ormrq`
-           -     :ref:`onemkl_lapack_ormrq_scratchpad_size`
-           -     Multiplies a real matrix by the orthogonal matrix :math:`Q` of the RQ factorization formed by gerqf.
-         * -     :ref:`onemkl_lapack_potrf`
-           -     :ref:`onemkl_lapack_potrf_scratchpad_size`
-           -     Computes the Cholesky factorization of a symmetric (Hermitian) positive-definite matrix.   
-         * -     :ref:`onemkl_lapack_potri`
-           -     :ref:`onemkl_lapack_potri_scratchpad_size`
-           -     Computes the inverse of a Cholesky-factored symmetric (Hermitian) positive-definite matrix.   
-         * -     :ref:`onemkl_lapack_potrs`
-           -     :ref:`onemkl_lapack_potrs_scratchpad_size`
-           -     Solves a system of linear equations with a Cholesky-factored symmetric (Hermitian) positive-definite coefficient matrix, with multiple right-hand sides.    
-         * -     :ref:`onemkl_lapack_sytrf`
-           -     :ref:`onemkl_lapack_sytrf_scratchpad_size`
-           -     Computes the Bunch-Kaufman factorization of a symmetric matrix.   
-         * -     :ref:`onemkl_lapack_trtrs`
-           -     :ref:`onemkl_lapack_trtrs_scratchpad_size`
-           -     Solves a system of linear equations with a triangular coefficient matrix, with multiple right-hand sides.    
-         * -     :ref:`onemkl_lapack_ungqr`
-           -     :ref:`onemkl_lapack_ungqr_scratchpad_size`
-           -     Generates the complex unitary matrix :math:`Q` of the QR factorization formed by geqrf.
-         * -     :ref:`onemkl_lapack_unmqr`
-           -     :ref:`onemkl_lapack_unmqr_scratchpad_size`
-           -     Multiplies a complex matrix by the unitary matrix :math:`Q` of the QR factorization formed by geqrf.
-         * -     :ref:`onemkl_lapack_unmrq`
-           -     :ref:`onemkl_lapack_unmrq_scratchpad_size`
-           -     Multiplies a complex matrix by the unitary matrix :math:`Q` of the RQ factorization formed by gerqf.
-
-
-
-
-
-.. toctree::
-    :hidden:
-
-    geqrf
-    geqrf_scratchpad_size
-    gerqf
-    gerqf_scratchpad_size
-    getrf
-    getrf_scratchpad_size
-    getri
-    getri_scratchpad_size
-    getrs
-    getrs_scratchpad_size
-    hetrf
-    hetrf_scratchpad_size
-    orgqr
-    orgqr_scratchpad_size
-    ormqr
-    ormqr_scratchpad_size
-    ormrq
-    ormrq_scratchpad_size
-    potrf
-    potrf_scratchpad_size
-    potri
-    potri_scratchpad_size
-    potrs
-    potrs_scratchpad_size
-    sytrf
-    sytrf_scratchpad_size
-    trtrs
-    trtrs_scratchpad_size
-    ungqr
-    ungqr_scratchpad_size
-    unmqr
-    unmqr_scratchpad_size
-    unmrq
-    unmrq_scratchpad_size
diff --git a/docs/domains/lapack/lapack-singular-value-eigenvalue-routines.inc.rst b/docs/domains/lapack/lapack-singular-value-eigenvalue-routines.inc.rst
deleted file mode 100644
index bcf8c1af3..000000000
--- a/docs/domains/lapack/lapack-singular-value-eigenvalue-routines.inc.rst
+++ /dev/null
@@ -1,105 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack-singular-value-eigenvalue-routines:
-
-LAPACK Singular Value and Eigenvalue Problem Routines
-=====================================================
-
-
-.. container::
-
-
-   LAPACK Singular Value and Eigenvalue Problem routines are used for
-   singular value and eigenvalue problems, and for performing a number of related
-   computational tasks. The following table lists the LAPACK Singular Value and 
-   Eigenvalue Problem routine groups.
-
-
-   .. container:: tablenoborder
-
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -     Routines
-           -     Scratchpad Size Routines
-           -     Description     
-         * -     :ref:`onemkl_lapack_gebrd`
-           -     :ref:`onemkl_lapack_gebrd_scratchpad_size`
-           -     Reduces a general matrix to bidiagonal form.   
-         * -     :ref:`onemkl_lapack_gesvd`
-           -     :ref:`onemkl_lapack_gesvd_scratchpad_size`
-           -     Computes the singular value decomposition of a general rectangular matrix.
-         * -     :ref:`onemkl_lapack_heevd`
-           -     :ref:`onemkl_lapack_heevd_scratchpad_size`
-           -     Computes all eigenvalues and, optionally, all eigenvectors of a complex Hermitian matrix using divide and conquer algorithm.
-         * -     :ref:`onemkl_lapack_hegvd`
-           -     :ref:`onemkl_lapack_hegvd_scratchpad_size`
-           -     Computes all eigenvalues and, optionally, all eigenvectors of a complex generalized Hermitian definite eigenproblem using divide and conquer algorithm.
-         * -     :ref:`onemkl_lapack_hetrd`
-           -     :ref:`onemkl_lapack_hetrd_scratchpad_size`
-           -     Reduces a complex Hermitian matrix to tridiagonal form.
-         * -     :ref:`onemkl_lapack_orgbr`
-           -     :ref:`onemkl_lapack_orgbr_scratchpad_size`
-           -     Generates the real orthogonal matrix :math:`Q` or :math:`P^T` determined by gebrd.
-         * -     :ref:`onemkl_lapack_orgtr`
-           -     :ref:`onemkl_lapack_orgtr_scratchpad_size`
-           -     Generates the real orthogonal matrix :math:`Q` determined by sytrd.
-         * -     :ref:`onemkl_lapack_ormtr`
-           -     :ref:`onemkl_lapack_ormtr_scratchpad_size`
-           -     Multiplies a real matrix by the orthogonal matrix :math:`Q` determined by sytrd.
-         * -     :ref:`onemkl_lapack_syevd`
-           -     :ref:`onemkl_lapack_syevd_scratchpad_size`
-           -     Computes all eigenvalues and, optionally, all eigenvectors of a real symmetric matrix using divide and conquer algorithm.
-         * -     :ref:`onemkl_lapack_sygvd`
-           -     :ref:`onemkl_lapack_sygvd_scratchpad_size`
-           -     Computes all eigenvalues and, optionally, all eigenvectors of a real generalized symmetric definite eigenproblem using divide and conquer algorithm.
-         * -     :ref:`onemkl_lapack_sytrd`
-           -     :ref:`onemkl_lapack_sytrd_scratchpad_size`
-           -     Reduces a real symmetric matrix to tridiagonal form.
-         * -     :ref:`onemkl_lapack_ungbr`
-           -     :ref:`onemkl_lapack_ungbr_scratchpad_size`
-           -     Generates the complex unitary matrix :math:`Q` or :math:`P^T` determined by gebrd.
-         * -     :ref:`onemkl_lapack_ungtr`
-           -     :ref:`onemkl_lapack_ungtr_scratchpad_size`
-           -     Generates the complex unitary matrix :math:`Q` determined by hetrd.
-         * -     :ref:`onemkl_lapack_unmtr`
-           -     :ref:`onemkl_lapack_unmtr_scratchpad_size`
-           -     Multiplies a complex matrix by the unitary matrix :math:`Q` determined by hetrd.
-
-
-
-
-.. toctree::
-    :hidden:
-
-    gebrd
-    gebrd_scratchpad_size
-    gesvd
-    gesvd_scratchpad_size
-    heevd
-    heevd_scratchpad_size
-    hegvd
-    hegvd_scratchpad_size
-    hetrd
-    hetrd_scratchpad_size
-    orgbr
-    orgbr_scratchpad_size
-    orgtr
-    orgtr_scratchpad_size
-    ormtr
-    ormtr_scratchpad_size
-    syevd
-    syevd_scratchpad_size
-    sygvd
-    sygvd_scratchpad_size
-    sytrd
-    sytrd_scratchpad_size
-    ungbr
-    ungbr_scratchpad_size
-    ungtr
-    ungtr_scratchpad_size
-    unmtr
-    unmtr_scratchpad_size
diff --git a/docs/domains/lapack/lapack.rst b/docs/domains/lapack/lapack.rst
deleted file mode 100644
index bb11e72ed..000000000
--- a/docs/domains/lapack/lapack.rst
+++ /dev/null
@@ -1,43 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack:
-
-LAPACK Routines
-+++++++++++++++
-
-oneMKL provides a DPC++ interface to select routines from the Linear Algebra PACKage (LAPACK), as well as several LAPACK-like extension routines.
-
-.. include:: lapack-linear-equation-routines.inc.rst
-.. include:: lapack-singular-value-eigenvalue-routines.inc.rst
-.. include:: lapack-like-extensions.inc.rst
-
-
-.. container::
-
-   .. container:: Note
-
-
-      .. rubric:: Note
-         :class: NoteTipHead
-
-
-      Different arrays used as parameters to oneMKL LAPACK routines must
-      not overlap.
-
-
-   .. container:: Note
-
-
-      .. rubric:: Warning
-         :name: warning
-         :class: NoteTipHead
-
-
-      LAPACK routines assume that input matrices do not contain IEEE 754
-      special values such as INF or NaN values. Using these special
-      values may cause LAPACK to return unexpected results or become
-      unstable.
-
-**Parent topic:** :ref:`onemkl_dense_linear_algebra`
diff --git a/docs/domains/lapack/orgbr.rst b/docs/domains/lapack/orgbr.rst
deleted file mode 100644
index 6ff70338f..000000000
--- a/docs/domains/lapack/orgbr.rst
+++ /dev/null
@@ -1,226 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgbr:
-
-orgbr
-=====
-
-Generates the real orthogonal matrix :math:`Q` or :math:`P^{T}`
-determined by
-:ref:`onemkl_lapack_gebrd`.
-
-``orgbr`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-
-.. container:: section
-
-  .. rubric:: Description
-      
-The routine generates the whole or part of the orthogonal matrices
-:math:`Q` and :math:`P^{T}` formed by the routines :ref:`onemkl_lapack_gebrd`.
-All valid combinations of arguments are described in *Input parameters*. In
-most cases you need the following:
-
-To compute the whole :math:`m \times m` matrix :math:`Q`:
-
-::
-
-   orgbr(queue, generate::q, m, m, n, a, ...)
-
-(note that the array ``a`` must have at least :math:`m` columns).
-
-To form the :math:`n` leading columns of :math:`Q` if :math:`m > n`:
-
-::
-
-   orgbr(queue, generate::q, m, n, n, a, ...)
-
-To compute the whole :math:`n \times n` matrix :math:`P^{T}`:
-
-::
-
-   orgbr(queue, generate::p, n, n, m, a, ...)
-
-(note that the array ``a`` must have at least :math:`n` rows).
-
-To form the :math:`m` leading rows of :math:`P^{T}` if :math:`m < n`:
-
-::
-
-   orgbr(queue, generate::p, m, n, m, a, ...)
-
-orgbr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void orgbr(sycl::queue &queue, oneapi::mkl::generate gen, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-gen
-   Must be ``generate::q`` or ``generate::p``.
-
-   If ``gen = generate::q``, the routine generates the matrix :math:`Q`.
-
-   If ``gen = generate::p``, the routine generates the matrix
-   :math:`P^{T}`.
-
-m
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le m)`.
-
-   If ``gen = generate::q``, :math:`m \le n \le \min(m, k)`.
-
-   If ``gen = generate::p``, :math:`n \le m \le \min(n, k)`.
-
-n
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le n)`. See m for constraints.
-
-k
-   If ``gen = generate::q``, the number of columns in the original
-   :math:`m \times k` matrix reduced by
-   :ref:`onemkl_lapack_gebrd`.
-
-   If ``gen = generate::p``, the number of rows in the original
-   :math:`k \times n` matrix reduced by
-   :ref:`onemkl_lapack_gebrd`.
-
-a
-   The buffer ``a`` as returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-lda
-   The leading dimension of ``a``.
-
-tau
-   Buffer, size :math:`\min(m,k)` if ``gen = generate::q``, size
-   :math:`\min(n,k)` if ``gen = generate::p``. Scalar factor of the
-   elementary reflectors, as returned by :ref:`onemkl_lapack_gebrd` in the array tauq
-   or taup.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_orgbr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by n leading columns of the :math:`m \times m` orthogonal matrix
-   :math:`Q` or :math:`P^{T}` (or the leading rows or columns thereof)
-   as specified by ``gen``, ``m``, and ``n``.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-orgbr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate gen, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-gen
-   Must be ``generate::q`` or ``generate::p``.
-
-   If ``gen = generate::q``, the routine generates the matrix :math:`Q`.
-
-   If ``gen = generate::p``, the routine generates the matrix
-   :math:`P^{T}`.
-
-m
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le m)`.
-
-   If ``gen = generate::q``, :math:`m \le n \le \min(m, k)`.
-
-   If ``gen = generate::p``, :math:`n \le m \le \min(n, k)`.
-
-n
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le n)`. See m for constraints.
-
-k
-   If ``gen = generate::q``, the number of columns in the original
-   :math:`m \times k` matrix reduced by
-   :ref:`onemkl_lapack_gebrd`.
-
-   If ``gen = generate::p``, the number of rows in the original
-   :math:`k \times n` matrix reduced by
-   :ref:`onemkl_lapack_gebrd`.
-
-a
-   Pointer to array ``a`` as returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-lda
-   The leading dimension of ``a``.
-
-tau
-   Pointer to array of size :math:`\min(m,k)` if ``gen = generate::q``, size
-   :math:`\min(n,k)` if ``gen = generate::p``. Scalar factor of the
-   elementary reflectors, as returned by :ref:`onemkl_lapack_gebrd` in the array tauq
-   or taup.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_orgbr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by n leading columns of the :math:`m \times m` orthogonal matrix
-   :math:`Q` or :math:`P^{T}` (or the leading rows or columns thereof)
-   as specified by ``gen``, ``m``, and ``n``.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/lapack/orgbr_scratchpad_size.rst b/docs/domains/lapack/orgbr_scratchpad_size.rst
deleted file mode 100644
index 7e7804158..000000000
--- a/docs/domains/lapack/orgbr_scratchpad_size.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgbr_scratchpad_size:
-
-orgbr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_orgbr` function.
-
-``orgbr_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-
-.. container:: section
-
-  .. rubric:: Description
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_orgbr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-orgbr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate gen, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t &scratchpad_size) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_orgbr` function will be performed.
-
-gen
-   Must be ``generate::q`` or ``generate::p``.
-
-   If ``gen = generate::q``, the routine generates the matrix
-   :math:`Q`.
-
-   If ``gen = generate::p``, the routine generates the matrix
-   :math:`P^{T}`.
-
-m
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le m)`.
-
-   If ``gen = generate::q``, :math:`m \le  n \le \min(m, k)`.
-
-   If ``gen = generate::p``, :math:`n \le m \le \min(n, k)`.
-
-n
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le n)`. See ``m`` for constraints.
-
-k
-   If ``gen = generate::q``, the number of columns in the original
-   :math:`m \times k` matrix returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-   If ``gen = generate::p``, the number of rows in the original
-   :math:`k \times n` matrix returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_orgbr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines` 
-
-
diff --git a/docs/domains/lapack/orgqr.rst b/docs/domains/lapack/orgqr.rst
deleted file mode 100644
index 532e7fd9c..000000000
--- a/docs/domains/lapack/orgqr.rst
+++ /dev/null
@@ -1,183 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgqr:
-
-orgqr
-=====
-
-Generates the real orthogonal matrix :math:`Q` of the QR factorization formed
-by :ref:`onemkl_lapack_geqrf`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``orgqr`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-
-The routine generates the whole or part of :math:`m \times m` orthogonal
-matrix :math:`Q` of the QR factorization formed by the routine
-:ref:`onemkl_lapack_geqrf`.
-
-Usually :math:`Q` is determined from the QR factorization of an ``m``
-by ``p`` matrix :math:`A` with :math:`m \ge p`. To compute the whole matrix
-:math:`Q`, use:
-
-::
-
-   oneapi::mkl::lapack::orgqr(queue, m, m, p, a, lda, tau, scratchpad, scratchpad_size)
-
-To compute the leading :math:`p` columns of :math:`Q` (which form an
-orthonormal basis in the space spanned by the columns of :math:`A`):
-
-::
-
-   oneapi::mkl::lapack::orgqr(queue, m, p, p, a, lda, tau, scratchpad, scratchpad_size)
-
-To compute the matrix :math:`Q^{k}` of the QR factorization of
-leading :math:`k` columns of the matrix :math:`A`:
-
-::
-
-   oneapi::mkl::lapack::orgqr(queue, m, m, k, a, lda, tau, scratchpad, scratchpad_size)
-
-To compute the leading :math:`k` columns of :math:`Q^{k}` (which form
-an orthonormal basis in the space spanned by leading :math:`k` columns of
-the matrix :math:`A`):
-
-::
-
-   oneapi::mkl::lapack::orgqr(queue, m, k, k, a, lda, tau, scratchpad, scratchpad_size)
-
-orgqr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-a
-   The buffer ``a`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-lda
-   The leading dimension of ``a`` (:math:`\text{lda} \le m`).
-
-tau
-   The buffer ``tau`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_orgqr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by :math:`n` leading columns of the :math:`m \times m` orthogonal matrix
-   :math:`Q`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-orgqr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-a
-   The pointer to ``a`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-lda
-   The leading dimension of ``a`` (:math:`\text{lda} \le m`).
-
-tau
-   The pointer to ``tau`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_orgqr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by :math:`n` leading columns of the :math:`m \times m` orthogonal matrix
-   :math:`Q`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/orgqr_batch.rst b/docs/domains/lapack/orgqr_batch.rst
deleted file mode 100644
index 6984ebead..000000000
--- a/docs/domains/lapack/orgqr_batch.rst
+++ /dev/null
@@ -1,262 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgqr_batch:
-
-orgqr_batch
-===========
-
-Generates the orthogonal/unitary matrix :math:`Q_i` of the QR factorizations for a group of general matrices.
-
-.. rubric:: Description
-
-``orgqr_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-.. _onemkl_lapack_orgqr_batch_buffer:
-
-orgqr_batch (Buffer Version)
-----------------------------
-
-.. rubric:: Description
-
-The buffer version of ``orgqr_batch`` supports only the strided API. 
-   
-**Strided API**
-
- | The routine generates the wholes or parts of :math:`m \times n` orthogonal matrices :math:`Q_i` of the batch of QR factorizations formed by the Strided API of the :ref:`onemkl_lapack_geqrf_batch_buffer` function.
- | Usually :math:`Q_i` is determined from the QR factorization of an :math:`m \times p` matrix :math:`A_i` with :math:`m \ge p`.
- | To compute the whole matrices :math:`Q_i`, use:
- | ``orgqr_batch(queue, m, m, p, a, ...)``
- | To compute the leading :math:`p` columns of :math:`Q_i` (which form an orthonormal basis in the space spanned by the columns of :math:`A_i`):
- | ``orgqr_batch(queue, m, p, p, a, ...)``
- | To compute the matrices :math:`Q_i^k` of the QR factorizations of leading :math:`k` columns of the matrices :math:`A_i`:
- | ``orgqr_batch(queue, m, m, k, a, ...)``
- | To compute the leading :math:`k` columns of :math:`Q_i^k` (which form an orthonormal basis in the space spanned by leading :math:`k` columns of the matrices :math:`A_i`):
- | ``orgqr_batch(queue, m, k, k, a, ...)``
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer<T> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in the matrices :math:`A_i` (:math:`0 \le n`).
-
-k
-  Number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k \le n`).
-
-a
-  Array resulting after call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_buffer` function.
-
-lda
-  Leading dimension of :math:`A_i` (:math:`\text{lda} \le m`).
-
-stride_a
-  The stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-tau
-  Array resulting from call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_buffer` function.
-
-stride_tau
-  Stride between the beginnings of arrays :math:`\tau_i` inside the array ``tau``.
-
-batch_size
-  Specifies the number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_orgqr_batch_scratchpad_size` function.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  Batch of :math:`n` leading columns of the :math:`m \times m` orthogonal matrices :math:`Q_i`.
-
-.. _onemkl_lapack_orgqr_batch_usm:
-
-orgqr_batch (USM Version)
--------------------------
-
-.. rubric:: Description
-
-The USM version of ``orgqr_batch`` supports the group API and strided API. 
-
-**Group API**
-
- | The routine generates the wholes or parts of :math:`m \times n` orthogonal matrices :math:`Q_i` of the batch of QR factorizations formed by the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
- | Usually :math:`Q_i` is determined from the QR factorization of an :math:`m \times p` matrix :math:`A_i` with :math:`m \ge p`.
- | To compute the whole matrices :math:`Q_i`, use:
- | ``orgqr_batch(queue, m, m, p, a, ...)``
- | To compute the leading :math:`p` columns of :math:`Q_i` (which form an orthonormal basis in the space spanned by the columns of :math:`A_i`):
- | ``orgqr_batch(queue, m, p, p, a, ...)``
- | To compute the matrices :math:`Q_i^k` of the QR factorizations of leading :math:`k` columns of the matrices :math:`A_i`:
- | ``orgqr_batch(queue, m, m, k, a, ...)``
- | To compute the leading :math:`k` columns of :math:`Q_i^k` (which form an orthonormal basis in the space spanned by leading :math:`k` columns of the matrices :math:`A_i`):
- | ``orgqr_batch(queue, m, k, k, a, ...)``
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, T **a, std::int64_t *lda, T **tau, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Array of ``group_count`` :math:`m_g` parameters as previously supplied to group version of geqrf_batch function.
-
-n
-  Array of ``group_count`` :math:`n_g` parameters as previously supplied to group version of geqrf_batch function.
-
-k
-  Array of ``group_count`` :math:`k_g` parameters as previously supplied to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function. The number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k_g \le n_g`).
-
-a
-  Array resulting after call to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-lda
-  Array of leading dimensions of :math:`A_i` as previously supplied to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-tau
-  Array resulting after call to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by Group API of the :ref:`onemkl_lapack_orgqr_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  :math:`n_g` leading columns of the :math:`m_g \times m_g` orthogonal matrices :math:`Q_i`, where :math:`g` is an index of group of parameters corresponding to :math:`Q_i`.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
- | The routine generates the wholes or parts of :math:`m \times n` orthogonal matrices :math:`Q_i` of the batch of QR factorizations formed by the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
- | Usually :math:`Q_i` is determined from the QR factorization of an :math:`m \times p` matrix :math:`A_i` with :math:`m \ge p`.
- | To compute the whole matrices :math:`Q_i`, use:
- | ``orgqr_batch(queue, m, m, p, a, ...)``
- | To compute the leading :math:`p` columns of :math:`Q_i` (which form an orthonormal basis in the space spanned by the columns of :math:`A_i`):
- | ``orgqr_batch(queue, m, p, p, a, ...)``
- | To compute the matrices :math:`Q_i^k` of the QR factorizations of leading :math:`k` columns of the matrices :math:`A_i`:
- | ``orgqr_batch(queue, m, m, k, a, ...)``
- | To compute the leading :math:`k` columns of :math:`Q_i^k` (which form an orthonormal basis in the space spanned by leading :math:`k` columns of the matrices :math:`A_i`):
- | ``orgqr_batch(queue, m, k, k, a, ...)``
-
-.. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, std::int64_t stride_a, T *tau, std::int64_t stride_tau, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    };
-
-.. container:: section
-
-   .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in the matrices :math:`A_i` (:math:`0 \le n`).
-
-k
-  Number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k \le n`).
-
-a
-  Array resulting after call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-lda
-  Leading dimension of :math:`A_i` (:math:`\text{lda} \le m`).
-
-stride_a
-  The stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-tau
-  Array resulting from call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-stride_tau
-  Stride between the beginnings of arrays :math:`\tau_i` inside the array ``tau``.
-
-batch_size
-  Specifies the number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_orgqr_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-   .. rubric:: Output Parameters
-
-a
-  Batch of :math:`n` leading columns of the :math:`m \times m` orthogonal matrices :math:`Q_i`.
-
-.. container:: section
-   
-   .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/orgqr_batch_scratchpad_size.rst b/docs/domains/lapack/orgqr_batch_scratchpad_size.rst
deleted file mode 100644
index 444075609..000000000
--- a/docs/domains/lapack/orgqr_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,121 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgqr_batch_scratchpad_size:
-
-orgqr_batch_scratchpad_size
-===========================
-
-Computes size of scratchpad memory required for the :ref:`onemkl_lapack_orgqr_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``orgqr_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_orgqr_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Array of ``group_count`` :math:`m_g` parameters.
-
-n
-  Array of ``group_count`` :math:`n_g` parameters.
-
-k
-  Array of ``group_count`` kg parameters. The number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k_g \le n_g`).
-
-lda
-  Array of leading dimensions of :math:`A_i`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_orgqr_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_orgqr_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in the matrices Ai (:math:`0 \le n`).
-
-k
-  Number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k \le n`).
-
-lda
-  Leading dimension of :math:`A_i` (:math:`\text{lda} \le m`).
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_tau  
-  Stride between the beginnings of arrays :math:`tau_i` inside the array ``tau``.
-
-batch_size
-  Number of problems in a batch.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_orgqr_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/orgqr_scratchpad_size.rst b/docs/domains/lapack/orgqr_scratchpad_size.rst
deleted file mode 100644
index 4ca2f4d6c..000000000
--- a/docs/domains/lapack/orgqr_scratchpad_size.rst
+++ /dev/null
@@ -1,70 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgqr_scratchpad_size:
-
-orgqr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_orgqr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``orgqr_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_orgqr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-orgqr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_orgqr` function will be performed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n \le m`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_orgqr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/orgtr.rst b/docs/domains/lapack/orgtr.rst
deleted file mode 100644
index 757e6b9ae..000000000
--- a/docs/domains/lapack/orgtr.rst
+++ /dev/null
@@ -1,148 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgtr:
-
-orgtr
-=====
-
-Generates the real orthogonal matrix :math:`Q` determined by
-:ref:`onemkl_lapack_sytrd`.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``orgtr`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-
-The routine explicitly generates the :math:`n \times n` orthogonal matrix
-:math:`Q` formed by :ref:`onemkl_lapack_sytrd` when
-reducing a real symmetric matrix :math:`A` to tridiagonal form:
-:math:`A = QTQ^T`. Use this routine after a call to
-:ref:`onemkl_lapack_sytrd`.
-
-orgtr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void orgtr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to :ref:`onemkl_lapack_sytrd`.
-
-n
-   The order of the matrix :math:`Q` :math:`(0 \le n)`.
-
-a
-   The buffer ``a`` as returned by :ref:`onemkl_lapack_sytrd`. The
-   second dimension of ``a`` must be at least :math:`\max(1,n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-tau
-   The buffer ``tau`` as returned by :ref:`onemkl_lapack_sytrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1, n-1)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_orgtr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by the orthogonal matrix :math:`Q`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-orgtr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied
-   to :ref:`onemkl_lapack_sytrd`.
-
-n
-   The order of the matrix :math:`Q` :math:`(0 \le n)`.
-
-a
-   The pointer to ``a`` as returned by
-   :ref:`onemkl_lapack_sytrd`. The
-   second dimension of ``a`` must be at least :math:`\max(1,n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-tau
-   The pointer to ``tau`` as returned by :ref:`onemkl_lapack_sytrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1, n-1)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_orgtr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   Overwritten by the orthogonal matrix :math:`Q`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/lapack/orgtr_scratchpad_size.rst b/docs/domains/lapack/orgtr_scratchpad_size.rst
deleted file mode 100644
index aee5516c6..000000000
--- a/docs/domains/lapack/orgtr_scratchpad_size.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_orgtr_scratchpad_size:
-
-orgtr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_orgtr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``orgtr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_orgtr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-orgtr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_orgtr` function will be performed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to :ref:`onemkl_lapack_sytrd`.
-
-n
-   The order of the matrix :math:`Q` :math:`(0 \le n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_orgtr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/ormqr.rst b/docs/domains/lapack/ormqr.rst
deleted file mode 100644
index 320bfe69d..000000000
--- a/docs/domains/lapack/ormqr.rst
+++ /dev/null
@@ -1,207 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ormqr:
-
-ormqr
-=====
-
-Multiplies a real matrix by the orthogonal matrix :math:`Q` of the QR
-factorization formed by :ref:`onemkl_lapack_geqrf`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``ormqr`` supports the following precisions.
-
-    .. list-table::
-       :header-rows: 1
-
-       * -  T
-       * -  ``float``
-       * -  ``double``
-
-The routine multiplies a rectangular real :math:`m \times n` matrix :math:`C` by
-:math:`Q` or :math:`Q^T`, where :math:`Q` is the complex unitary matrix defined
-as a product of :math:`k` elementary reflectors :math:`H(i)` of order :math:`n`:
-:math:`Q = H(1)^TH(2)^T ... H(k)^T` as returned by the RQ factorization routine
-:ref:`onemkl_lapack_gerqf`.
-
-Depending on the parameters ``side`` and ``trans``, the routine can form one of
-the matrix products :math:`QC`, :math:`Q^TC`, :math:`CQ`, or :math:`CQ^T`
-(overwriting the result over :math:`C`).
-
-ormqr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &c, std::int64_t ldc, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{T}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{T}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::trans``, the routine multiplies :math:`C`
-    by :math:`Q^{T}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q` 
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The buffer ``a`` as returned by :ref:`onemkl_lapack_geqrf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The buffer ``tau`` as returned by :ref:`onemkl_lapack_geqrf`.
-
-c
-    The buffer ``c`` contains the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by the
-    :ref:`onemkl_lapack_ormqr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{T}C`, :math:`CQ`, or
-    :math:`CQ^{T}` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-ormqr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{T}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{T}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::trans``, the routine multiplies :math:`C`
-    by :math:`Q^{T}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q`
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The pointer to ``a`` as returned by :ref:`onemkl_lapack_geqrf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The pointer to ``tau`` as returned by :ref:`onemkl_lapack_geqrf`.
-
-c
-    The pointer ``c`` points to the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by the
-    :ref:`onemkl_lapack_ormqr_scratchpad_size` function.
-
-events
-    List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{T}C`, :math:`CQ`, or
-    :math:`CQ^{T}` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
diff --git a/docs/domains/lapack/ormqr_scratchpad_size.rst b/docs/domains/lapack/ormqr_scratchpad_size.rst
deleted file mode 100644
index 66cd996d8..000000000
--- a/docs/domains/lapack/ormqr_scratchpad_size.rst
+++ /dev/null
@@ -1,87 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ormqr_scratchpad_size:
-
-ormqr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_ormqr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``ormqr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ormqr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-ormqr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc, std::int64_t &scratchpad_size) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_ormqr` function will be performed.
-
-side
-   If ``side=oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the left.
-
-   If ``side=oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the right.
-
-trans
-   If ``trans=oneapi::mkl::transpose::nontrans``, the routine multiplies
-   :math:`C` by :math:`Q`.
-
-   If ``trans=oneapi::mkl::transpose::trans``, the routine multiplies
-   :math:`C` by :math:`Q^{T}`.
-
-m
-   The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`C` (:math:`0 \le n \le m`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-ldc
-   The leading dimension of ``c``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ormqr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines` 
-
-
diff --git a/docs/domains/lapack/ormrq.rst b/docs/domains/lapack/ormrq.rst
deleted file mode 100644
index d49f0e3dc..000000000
--- a/docs/domains/lapack/ormrq.rst
+++ /dev/null
@@ -1,208 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ormrq:
-
-ormrq
-=====
-
-Multiplies a real matrix by the orthogonal matrix :math:`Q` of the RQ
-factorization formed by :ref:`onemkl_lapack_gerqf`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``ormrq`` supports the following precisions.
-
-    .. list-table::
-       :header-rows: 1
-
-       * -  T
-       * -  ``float``
-       * -  ``double``
-
-The routine multiplies a rectangular real :math:`m \times n` matrix :math:`C` by
-:math:`Q` or :math:`Q^T`, where :math:`Q` is the complex unitary matrix defined
-as a product of :math:`k` elementary reflectors :math:`H(i)` of order :math:`n`:
-:math:`Q = H(1)^TH(2)^T ... H(k)^T` as returned by the RQ factorization routine
-:ref:`onemkl_lapack_gerqf`.
-
-Depending on the parameters ``side`` and ``trans``, the routine can form one of
-the matrix products :math:`QC`, :math:`Q^TC`, :math:`CQ`, or :math:`CQ^T`
-(overwriting the result over :math:`C`).
-
-ormrq (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &c, std::int64_t ldc, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{T}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{T}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::trans``, the routine multiplies :math:`C`
-    by :math:`Q^{T}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q`
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The buffer ``a`` as returned by :ref:`onemkl_lapack_gerqf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The buffer ``tau`` as returned by :ref:`onemkl_lapack_gerqf`.
-
-c
-    The buffer ``c`` contains the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by the
-    :ref:`onemkl_lapack_ormrq_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{T}C`, :math:`CQ`, or
-    :math:`CQ^{T}` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-ormrq (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{T}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{T}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::trans``, the routine multiplies :math:`C`
-    by :math:`Q^{T}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q`
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The pointer to ``a`` as returned by :ref:`onemkl_lapack_gerqf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The pointer to ``tau`` as returned by :ref:`onemkl_lapack_gerqf`.
-
-c
-    The pointer ``c`` points to the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by the
-    :ref:`onemkl_lapack_ormrq_scratchpad_size` function.
-
-events
-    List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{T}C`, :math:`CQ`, or
-    :math:`CQ^{T}` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/ormrq_scratchpad_size.rst b/docs/domains/lapack/ormrq_scratchpad_size.rst
deleted file mode 100644
index 6699947de..000000000
--- a/docs/domains/lapack/ormrq_scratchpad_size.rst
+++ /dev/null
@@ -1,81 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ormrq_scratchpad_size:
-
-ormrq_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_ormrq` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``ormrq_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-  
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ormrq` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-ormrq_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by the ormrq function will be performed.
-
-side
-   If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^T` is applied to :math:`C` from the left. 
-   
-   If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^T` is applied to :math:`C` from the right.
-
-trans
-   If ``trans=oneapi::mkl::transpose::nontrans``, the routine multiplies :math:`C` by :math:`Q`.
-
-   If ``trans=oneapi::mkl::transpose::trans``, the routine multiplies :math:`C` by :math:`Q^T`.
-
-m
-   The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`C` (:math:`0 \le n \le m`).
-
-k
-   The number of elementary reflectors whose product defines the matrix :math:`Q` (:math:`0 \le k \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-ldc
-   The leading dimension of ``c``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ormrq` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/ormtr.rst b/docs/domains/lapack/ormtr.rst
deleted file mode 100644
index d7413a1fe..000000000
--- a/docs/domains/lapack/ormtr.rst
+++ /dev/null
@@ -1,230 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ormtr:
-
-ormtr
-=====
-
-Multiplies a real matrix by the real orthogonal matrix :math:`Q` determined by
-:ref:`onemkl_lapack_sytrd`.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``ormtr`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-The routine multiplies a real matrix :math:`C` by :math:`Q` or :math:`Q^{T}`, 
-where :math:`Q` is the orthogonal matrix :math:`Q` formed by:ref:`onemkl_lapack_sytrd` 
-when reducing a real symmetric matrix :math:`A` to tridiagonal form:
-:math:`A = QTQ^T`. Use this routine after a call to :ref:`onemkl_lapack_sytrd`.
-
-Depending on the parameters side and trans, the routine can
-form one of the matrix products :math:`QC`, :math:`Q^TC`, :math:`CQ`, or
-:math:`CQ^T` (overwriting the result on :math:`C`).
-
-ormtr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &c, std::int64_t ldc, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-In the descriptions below, ``r`` denotes the order of :math:`Q`:
-
-.. container:: tablenoborder
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  :math:`r = m` 
-          -  if ``side = side::left`` 
-        * -  :math:`r = n` 
-          -  if ``side = side::right`` 
-
-queue
-   The queue where the routine should be executed.
-
-side
-   Must be either ``side::left`` or ``side::right``.
-
-   If ``side = side::left``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the left.
-
-   If ``side = side::right``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the right.
-
-upper_lower
-   Must be either ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_sytrd`.
-
-trans
-   Must be either ``transpose::nontrans`` or ``transpose::trans``.
-
-   If ``trans = transpose::nontrans``, the routine multiplies :math:`C`
-   by :math:`Q`.
-
-   If ``trans = transpose::trans``, the routine multiplies :math:`C` by
-   :math:`Q^{T}`.
-
-m
-   The number of rows in the matrix :math:`C` :math:`(m \ge 0)`.
-
-n
-   The number of columns in the matrix :math:`C` :math:`(n \ge 0)`.
-
-a
-   The buffer ``a`` as returned by   :ref:`onemkl_lapack_sytrd`.
-
-lda
-   The leading dimension of ``a`` :math:`(\max(1, r) \le \text{lda})`.
-
-tau
-   The buffer ``tau`` as returned bya   :ref:`onemkl_lapack_sytrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1, r-1)`.
-
-c
-   The buffer ``c`` contains the matrix :math:`C`. The second dimension of ``c``
-   must be at least :math:`\max(1, n)`.
-
-ldc
-   The leading dimension of ``c`` :math:`(\max(1, n) \le \text{ldc})`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ormtr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-   Overwritten by the product :math:`QC`, :math:`Q^TC`, :math:`CQ`, or :math:`CQ^T`
-   (as specified by ``side`` and ``trans``).
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-ormtr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-In the descriptions below, ``r`` denotes the order of :math:`Q`:
-
-.. container:: tablenoborder
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  :math:`r = m` 
-          -  if ``side = side::left`` 
-        * -  :math:`r = n` 
-          -  if ``side = side::right`` 
-
-queue
-   The queue where the routine should be executed.
-
-side
-   Must be either ``side::left`` or ``side::right``.
-
-   If ``side = side::left``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the left.
-
-   If ``side = side::right``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the right.
-
-upper_lower
-   Must be either ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to   :ref:`onemkl_lapack_sytrd`.
-
-trans
-   Must be either ``transpose::nontrans`` or ``transpose::trans``.
-
-   If ``trans = transpose::nontrans``, the routine multiplies :math:`C`
-   by :math:`Q`.
-
-   If ``trans = transpose::trans``, the routine multiplies :math:`C` by
-   :math:`Q^{T}`.
-
-m
-   The number of rows in the matrix :math:`C` :math:`(m \ge 0)`.
-
-n
-   The number of columns in the matrix :math:`C` :math:`(n \ge 0)`.
-
-a
-   The pointer to ``a`` as returned by   :ref:`onemkl_lapack_sytrd`.
-
-lda
-   The leading dimension of ``a`` :math:`(\max(1, r) \le \text{lda})`.
-
-tau
-   The buffer ``tau`` as returned by   :ref:`onemkl_lapack_sytrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1, r-1)`.
-
-c
-   The pointer to memory containing the matrix :math:`C`. The second dimension of ``c``
-   must be at least :math:`\max(1, n)`.
-
-ldc
-   The leading dimension of ``c`` :math:`(\max(1, n) \le \text{ldc})`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ormtr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-   Overwritten by the product :math:`QC`, :math:`Q^TC`, :math:`CQ`, or :math:`CQ^T`
-   (as specified by ``side`` and ``trans``).
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/lapack/ormtr_scratchpad_size.rst b/docs/domains/lapack/ormtr_scratchpad_size.rst
deleted file mode 100644
index a71506b8e..000000000
--- a/docs/domains/lapack/ormtr_scratchpad_size.rst
+++ /dev/null
@@ -1,105 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ormtr_scratchpad_size:
-
-ormtr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_ormtr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``ormtr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ormtr` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-ormtr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-In the descriptions below, ``r`` denotes the order of :math:`Q`:
-
-.. container:: tablenoborder
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  :math:`r = m` 
-          -  if ``side = side::left`` 
-        * -  :math:`r = n` 
-          -  if ``side = side::right`` 
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_ormtr` function will be performed.
-
-side
-   Must be either ``side::left`` or ``side::right``.
-
-   If ``side = side::left``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the left.
-
-   If ``side = side::right``, :math:`Q` or :math:`Q^{T}` is
-   applied to :math:`C` from the right.
-
-upper_lower
-   Must be either ``uplo::upper`` or ``uplo::lower``. Uses the
-   same ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_sytrd`.
-
-trans
-   Must be either ``transpose::nontrans`` or ``transpose::trans``.
-
-   If ``trans = transpose::nontrans``, the routine multiplies
-   :math:`C` by :math:`Q`.
-
-   If ``trans = transpose::trans``, the routine multiplies :math:`C`
-   by :math:`Q^{T}`.
-
-m
-   The number of rows in the matrix :math:`C` :math:`(m \ge 0)`.
-
-n
-   The number of rows in the matrix :math:`C` :math:`(n \ge 0)`.
-
-lda
-   The leading dimension of ``a`` :math:`(\max(1, r) \le \text{lda})`.
-
-ldc
-   The leading dimension of ``c`` :math:`(\max(1, n) \le \text{ldc})`.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ormtr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/potrf.rst b/docs/domains/lapack/potrf.rst
deleted file mode 100644
index dd044e1f2..000000000
--- a/docs/domains/lapack/potrf.rst
+++ /dev/null
@@ -1,172 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrf:
-
-potrf
-=====
-
-Computes the Cholesky factorization of a symmetric (Hermitian)
-positive-definite matrix.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``potrf`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine forms the Cholesky factorization of a symmetric
-positive-definite or, for complex data, Hermitian positive-definite
-matrix :math:`A`:
-
-    .. list-table:: 
-       :header-rows: 1
- 
-       * -  :math:`A` = :math:`U^{T}U` for real data, :math:`A = U^{H}U` for complex data
-         -  if upper_lower=\ ``oneapi::mkl::uplo::upper`` 
-       * -  :math:`A` = :math:`LL^{T}` for real data, :math:`A = LL^{H}` for complex data
-         -  if upper_lower=\ ``oneapi::mkl::uplo::lower`` 
-
-where :math:`L` is a lower triangular matrix and :math:`U` is upper
-triangular.
-
-potrf (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void potrf(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of :math:`A` is
-   stored and how :math:`A` is factored:
-
-   If upper_lower=\ ``oneapi::mkl::uplo::upper``, the array ``a`` stores the
-   upper triangular part of the matrix :math:`A`, and the strictly lower
-   triangular part of the matrix is not referenced.
-
-   If upper_lower=\ ``oneapi::mkl::uplo::lower``, the array ``a`` stores the
-   lower triangular part of the matrix :math:`A`, and the strictly upper
-   triangular part of the matrix is not referenced.
-
-n
-   Specifies the order of the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   Buffer holding input matrix :math:`A`. The buffer ``a`` contains either
-   the upper or the lower triangular part of the matrix :math:`A` (see
-   upper_lower). The second dimension of ``a`` must be at least
-   :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_potrf_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   The buffer ``a`` is overwritten by the Cholesky factor :math:`U` or :math:`L`,
-   as specified by ``upper_lower``.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-potrf (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of :math:`A` is
-   stored and how :math:`A` is factored:
-
-   If upper_lower=\ ``oneapi::mkl::uplo::upper``, the array ``a`` stores the
-   upper triangular part of the matrix :math:`A`, and the strictly lower
-   triangular part of the matrix is not referenced.
-
-   If upper_lower=\ ``oneapi::mkl::uplo::lower``, the array ``a`` stores the
-   lower triangular part of the matrix :math:`A`, and the strictly upper
-   triangular part of the matrix is not referenced.
-
-n
-   Specifies the order of the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   Pointer to input matrix :math:`A`. The array ``a`` contains either
-   the upper or the lower triangular part of the matrix :math:`A` (see
-   upper_lower). The second dimension of ``a`` must be at least
-   :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_potrf_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   The memory pointer to by pointer ``a`` is overwritten by the Cholesky factor :math:`U` or :math:`L`,
-   as specified by ``upper_lower``.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/potrf_batch.rst b/docs/domains/lapack/potrf_batch.rst
deleted file mode 100644
index 872886cd7..000000000
--- a/docs/domains/lapack/potrf_batch.rst
+++ /dev/null
@@ -1,239 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrf_batch:
-
-potrf_batch
-===========
-
-Computes the LU factorizations of a batch of general matrices.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``potrf_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_lapack_potrf_batch_buffer:
-
-potrf_batch (Buffer Version)
-----------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The buffer version of ``potrf_batch`` supports only the strided API. 
-   
-**Strided API**
-
- | The routine forms the Cholesky factorizations of a symmetric positive-definite or, for complex data, Hermitian positive-definite matrices :math:`A_i`, :math:`i \in \{1...batch\_size\}`:
- | :math:`A_i = U_i^TU_i` for real data, :math:`A_i = U_i^HU_i` for complex data if ``uplo = mkl::uplo::upper``,
- | :math:`A_i = L_iL_i^T` for real data, :math:`A_i = L_iL_i^H` for complex data if ``uplo = mkl::uplo::lower``,
- | where :math:`L_i` is a lower triangular matrix and :math:`U_i` is upper triangular.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void potrf_batch(sycl::queue &queue, mkl::uplo uplo, std::int64_t n, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo
-   | Indicates whether the upper or lower triangular part of :math:`A_i` is stored and how :math:`A_i` is factored:
-   | If ``uplo = mkl::uplo::upper``, the array ``a`` stores the upper triangular parts of the matrices :math:`A_i`,
-   | If ``uplo = mkl::uplo::lower``, the array ``a`` stores the lower triangular parts of the matrices :math:`A_i`.
-
-n
-  Order of the matrices :math:`A_i`, (:math:`0 \le n`).
-
-a
-  Array containing batch of input matrices :math:`A_i`, each of :math:`A_i` being of size :math:`\text{lda} \cdot n` and holding either upper or lower triangular parts of the matrices :math:`A_i` (see ``uplo``).
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_potrf_batch_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-	Cholesky factors :math:`U_i` or :math:`L_i`, as specified by ``uplo``.
-
-.. _onemkl_lapack_potrf_batch_usm:
-
-potrf_batch (USM Version)
--------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The USM version of ``potrf_batch`` supports the group API and strided API. 
-
-**Group API**
-
- | The routine forms the Cholesky factorizations of symmetric positive-definite or, for complex data, Hermitian positive-definite matrices :math:`A_i`, :math:`i \in \{1...batch\_size\}`:
- | :math:`A_i = U_i^TU_i` for real data (:math:`A_i = U_i^HU_i` for complex), if :math:`\text{uplo}_g` is ``mkl::uplo::upper``,
- | :math:`A_i = L_iL_i^T` for real data (:math:`A_i = L_iL_i^H` for complex), if :math:`\text{uplo}_g` is ``mkl::uplo::lower``,
- | where :math:`L_i` is a lower triangular matrix and :math:`U_i` is upper triangular, :math:`g` is an index of group of parameters corresponding to :math:`A_i`, and total number of problems to solve, ``batch_size``, is a sum of sizes of all of the groups of parameters as provided by ``group_sizes`` array
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event potrf_batch(sycl::queue &queue, mkl::uplo *uplo, std::int64_t *n, T **a, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo
-  | Array of ``group_count`` :math:`\text{uplo}_g` parameters. Each :math:`\text{uplo}_g` indicates whether the upper or lower triangular parts of the input matrices are provided:
-  | If :math:`\text{uplo}_g` is ``mkl::uplo::upper``, input matrices from array ``a`` belonging to group :math:`g` store the upper triangular parts,
-  | If :math:`\text{uplo}_g` is ``mkl::uplo::lower``, input matrices from array ``a`` belonging to group :math:`g` store the lower triangular parts.
-
-n
-  Array of ``group_count`` :math:`n_g` parameters. Each :math:`n_g` specifies the order of the input matrices from array a belonging to group :math:`g`.
-
-a
-  Array of ``batch_size`` pointers to input matrices :math:`A_i`, each being of size :math:`\text{lda}_g \cdot n_g` (:math:`g` is an index of group to which :math:`A_i` belongs to) and holding either upper or lower triangular part as specified by :math:`\text{uplo}_g`.
-
-lda
-  Array of ``group_count`` :math:`\text{lda}_g` parameters. Each :math:`\text{lda}_g` specifies the leading dimensions of the matrices from a belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of group_count integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Group API of the :ref:`onemkl_lapack_potrf_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-	Cholesky factors :math:`U_i` or :math:`L_i`, as specified by :math:`\text{uplo}_g` from corresponding group of parameters.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
- | The routine forms the Cholesky factorizations of a symmetric positive-definite or, for complex data, Hermitian positive-definite matrices :math:`A_i`, :math:`i \in \{1...batch\_size\}`:
- | :math:`A_i = U_i^TU_i` for real data, :math:`A_i = U_i^HU_i` for complex data if ``uplo = mkl::uplo::upper``,
- | :math:`A_i = L_iL_i^T` for real data, :math:`A_i = L_iL_i^H` for complex data if ``uplo = mkl::uplo::lower``,
- | where :math:`L_i` is a lower triangular matrix and :math:`U_i` is upper triangular.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event potrf_batch(sycl::queue &queue, mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo
-   | Indicates whether the upper or lower triangular part of :math:`A_i` is stored and how :math:`A_i` is factored:
-   | If ``uplo = mkl::uplo::upper``, the array ``a`` stores the upper triangular parts of the matrices :math:`A_i`,
-   | If ``uplo = mkl::uplo::lower``, the array ``a`` stores the lower triangular parts of the matrices :math:`A_i`.
-
-n
-  Order of the matrices :math:`A_i`, (:math:`0 \le n`).
-
-a
-  Array containing batch of input matrices :math:`A_i`, each of :math:`A_i` being of size :math:`\text{lda} \cdot n` and holding either upper or lower triangular parts of the matrices :math:`A_i` (see ``uplo``).
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_potrf_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-	Cholesky factors :math:`U_i` or :math:`L_i`, as specified by ``uplo``.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/potrf_batch_scratchpad_size.rst b/docs/domains/lapack/potrf_batch_scratchpad_size.rst
deleted file mode 100644
index 4a1b57bf8..000000000
--- a/docs/domains/lapack/potrf_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,120 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrf_batch_scratchpad_size:
-
-potrf_batch_scratchpad_size
-===========================
-
-Computes size of scratchpad memory required for the :ref:`onemkl_lapack_potrf_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``potrf_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_potrf_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo
- | Array of ``group_count`` :math:`\text{uplo}_g` parameters.
- | Each of :math:`\text{uplo}_g` indicates whether the upper or lower triangular parts of the input matrices are provided:
- | If :math:`\text{uplo}_g` is ``mkl::uplo::upper``, input matrices from array ``a`` belonging to group :math:`g` store the upper triangular parts,
- | If :math:`\text{uplo}_g` is ``mkl::uplo::lower``, input matrices from array ``a`` belonging to group :math:`g` store the lower triangular parts.
-
-n
- | Array of ``group_count`` :math:`n_g` parameters.
- | Each ng specifies the order of the input matrices belonging to group :math:`g`.
-
-lda
- | Array of ``group_count`` :math:`\text{lda}_g` parameters.
- | Each ldag specifies the leading dimensions of the matrices belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes 
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_potrf_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_potrf_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-uplo
- | Indicates whether the upper or lower triangular part of :math:`A_i` is stored and how :math:`A_i` is factored:
- | If ``uplo = mkl::uplo::upper``, the array ``a`` stores the upper triangular parts of the matrices :math:`A_i`,
- | If ``uplo = mkl::uplo::lower``, the array ``a`` stores the lower triangular parts of the matrices :math:`A_i`.
-
-n
-  Order of the matrices :math:`A_i`, (:math:`0 \le n`).
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch.
-
-batch_size
-  Number of problems in a batch.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_potrf_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/potrf_scratchpad_size.rst b/docs/domains/lapack/potrf_scratchpad_size.rst
deleted file mode 100644
index c1e423fa2..000000000
--- a/docs/domains/lapack/potrf_scratchpad_size.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrf_scratchpad_size:
-
-potrf_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_potrf` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``potrf_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_potrf` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-potrf_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_potrf` function will be performed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of :math:`A` is
-   stored and how :math:`A` is factored:
-
-   If ``upper_lower = oneapi::mkl::uplo::upper``, the array ``a`` stores the
-   upper triangular part of the matrix :math:`A`, and the strictly lower
-   triangular part of the matrix is not referenced.
-
-   If ``upper_lower = oneapi::mkl::uplo::lower``, the array ``a`` stores the
-   lower triangular part of the matrix :math:`A`, and the strictly upper
-   triangular part of the matrix is not referenced.
-
-n
-   Specifies the order of the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_potrf` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/potri.rst b/docs/domains/lapack/potri.rst
deleted file mode 100644
index 3e8de09d7..000000000
--- a/docs/domains/lapack/potri.rst
+++ /dev/null
@@ -1,144 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potri:
-
-potri
-=====
-
-Computes the inverse of a symmetric (Hermitian) positive-definite
-matrix using the Cholesky factorization.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``potri`` supports the following precisions.
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -  T 
-         * -  ``float`` 
-         * -  ``double`` 
-         * -  ``std::complex<float>`` 
-         * -  ``std::complex<double>`` 
-
-The routine computes the inverse :math:`A^{-1}` of a symmetric positive
-definite or, for complex flavors, Hermitian positive-definite matrix
-:math:`A`. Before calling this routine, call :ref:`onemkl_lapack_potrf`
-to factorize :math:`A`.
-
-potri (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void potri(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates how the input matrix :math:`A` has been    factored:
-
-   If ``upper_lower = oneapi::mkl::uplo::upper``, the upper   triangle of :math:`A` is stored.
-
-   If   ``upper_lower = oneapi::mkl::uplo::lower``, the lower triangle of :math:`A` is   stored.
-
-n
-   Specifies the order of the matrix    :math:`A` (:math:`0 \le n`).
-
-a
-   Contains the factorization of the matrix :math:`A`, as    returned by   :ref:`onemkl_lapack_potrf`.   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_potri_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   Overwritten by the upper or lower triangle of the inverse    of :math:`A`. Specified by ``upper_lower``.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-potri (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates how the input matrix :math:`A` has been    factored:
-
-   If ``upper_lower = oneapi::mkl::uplo::upper``, the upper   triangle of :math:`A` is stored.
-
-   If   ``upper_lower = oneapi::mkl::uplo::lower``, the lower triangle of :math:`A` is   stored.
-
-n
-   Specifies the order of the matrix    :math:`A` (:math:`0 \le n`).
-
-a
-   Contains the factorization of the matrix :math:`A`, as    returned by   :ref:`onemkl_lapack_potrf`.   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_potri_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   Overwritten by the upper or lower triangle of the inverse    of :math:`A`. Specified by ``upper_lower``.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/potri_scratchpad_size.rst b/docs/domains/lapack/potri_scratchpad_size.rst
deleted file mode 100644
index 07b14a341..000000000
--- a/docs/domains/lapack/potri_scratchpad_size.rst
+++ /dev/null
@@ -1,71 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potri_scratchpad_size:
-
-potri_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_potri` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``potri_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_potri` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-potri_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_potri` function will be performed.
-
-upper_lower
-   Indicates how the input matrix :math:`A` has been    factored:
-
-   If ``upper_lower = oneapi::mkl::uplo::upper``, the upper   triangle of :math:`A` is stored.
-
-   If   ``upper_lower = oneapi::mkl::uplo::lower``, the lower triangle of :math:`A` is   stored.
-
-n
-   Specifies the order of the matrix    :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_potri` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/potrs.rst b/docs/domains/lapack/potrs.rst
deleted file mode 100644
index 26fbc7c31..000000000
--- a/docs/domains/lapack/potrs.rst
+++ /dev/null
@@ -1,177 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrs:
-
-potrs
-=====
-
-Solves a system of linear equations with a Cholesky-factored
-symmetric (Hermitian) positive-definite coefficient matrix.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``potrs`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine solves for :math:`X` the system of linear equations
-:math:`AX = B` with a symmetric positive-definite or, for complex data,
-Hermitian positive-definite matrix :math:`A`, given the Cholesky
-factorization of :math:`A`:
-
-.. list-table:: 
-   :header-rows: 1
-
-   * -  :math:`A = U^TU` for real data, :math:`A = U^HU` for complex data
-     -  if ``upper_lower=oneapi::mkl::uplo::upper``
-   * -  :math:`A = LL^T` for real data, :math:`A = LL^H` for complex data
-     -  if ``upper_lower=oneapi::mkl::uplo::lower``
-
-where :math:`L` is a lower triangular matrix and :math:`U` is upper
-triangular. The system is solved with multiple right-hand sides
-stored in the columns of the matrix :math:`B`.
-
-Before calling this routine, you must call :ref:`onemkl_lapack_potrf` to compute
-the Cholesky factorization of :math:`A`.
-
-potrs (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void potrs(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t nrhs, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &b, std::int64_t ldb, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates how the input matrix has been factored:
-
-   If ``upper_lower = oneapi::mkl::uplo::upper``, the upper triangle   :math:`U` of :math:`A` is stored, where :math:`A` = :math:`U^{T}`U`   for real data, :math:`A` = :math:`U^{H}U` for complex data.
-
-   If ``upper_lower = oneapi::mkl::uplo::lower``, the lower triangle   :math:`L` of :math:`A` is stored, where :math:`A` = :math:`LL^{T}`   for real data, :math:`A` = :math:`LL^{H}` for complex   data.
-
-n
-   The order of matrix :math:`A` (:math:`0 \le n`).\
-
-nrhs
-   The number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-a
-   Buffer containing the factorization of the matrix A, as    returned by   :ref:`onemkl_lapack_potrf`.   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-b
-   The array ``b`` contains the matrix :math:`B` whose columns    are the right-hand sides for the systems of equations. The second   dimension of ``b`` must be at least :math:`\max(1,\text{nrhs})`.
-
-ldb
-   The leading dimension of ``b``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_potrs_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-b
-   Overwritten by the solution matrix :math:`X`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-potrs (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t nrhs, T *a, std::int64_t lda, T *b, std::int64_t ldb, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates how the input matrix has been factored:
-
-   If ``upper_lower = oneapi::mkl::uplo::upper``, the upper triangle   :math:`U` of :math:`A` is stored, where :math:`A` = :math:`U^{T}U`   for real data, :math:`A` = :math:`U^{H}U` for complex data.
-
-   If ``upper_lower = oneapi::mkl::uplo::lower``, the lower triangle   :math:`L` of :math:`A` is stored, where :math:`A` = :math:`LL^{T}`   for real data, :math:`A` = :math:`LL^{H}` for complex   data.
-
-n
-   The order of matrix :math:`A` (:math:`0 \le n`).\
-
-nrhs
-   The number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-a
-   Pointer to array containing the factorization of the matrix :math:`A`, as    returned by   :ref:`onemkl_lapack_potrf`.   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-b
-   The array ``b`` contains the matrix :math:`B` whose columns    are the right-hand sides for the systems of equations. The second   dimension of ``b`` must be at least :math:`\max(1,\text{nrhs})`.
-
-ldb
-   The leading dimension of ``b``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_potrs_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-b
-   Overwritten by the solution matrix :math:`X`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
-
diff --git a/docs/domains/lapack/potrs_batch.rst b/docs/domains/lapack/potrs_batch.rst
deleted file mode 100644
index f9c8f6477..000000000
--- a/docs/domains/lapack/potrs_batch.rst
+++ /dev/null
@@ -1,276 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrs_batch:
-
-potrs_batch
-===========
-
-Computes the LU factorizations of a batch of general matrices.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``potrs_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_lapack_potrs_batch_buffer:
-
-potrs_batch (Buffer Version)
-----------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The buffer version of ``potrs_batch`` supports only the strided API. 
-   
-**Strided API**
-
- | The routine solves for :math:`X_i` the systems of linear equations :math:`A_iX_i = B_i` with a symmetric positive-definite or, for complex data, Hermitian positive-definite matrices :math:`A_i`, given the Cholesky factorization of :math:`A_i`, :math:`i \in \{1...batch\_size\}`:
- | :math:`A_i = U_i^TU_i` for real data, :math:`A_i = U_i^HU_i` for complex data if ``uplo = mkl::uplo::upper``,
- | :math:`A_i = L_iL_i^T` for real data, :math:`A_i = L_iL_i^H` for complex data if ``uplo = mkl::uplo::lower``,
- | where :math:`L_i` is a lower triangular matrix and :math:`U_i` is upper triangular.
- | The systems are solved with multiple right-hand sides stored in the columns of the matrices :math:`B_i`.
- | Before calling this routine, matrices :math:`A_i` should be factorized by call to the Strided API of the :ref:`onemkl_lapack_potrf_batch_buffer` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void potrs_batch(sycl::queue &queue, mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer<T> &b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo
- | Indicates how the input matrices have been factored:
- | If ``uplo = mkl::uplo::upper``, the upper triangle :math:`U_i` of :math:`A_i` is stored, where :math:`A_i = U_i^TU_i` for real data, :math:`A_i = U_i^HU_i` for complex data.
- | If ``uplo = mkl::uplo::lower``, the upper triangle :math:`L_i` of :math:`A_i` is stored, where :math:`A_i = L_iL_i^T` for real data, :math:`A_i = L_iL_i^H` for complex data.
-
-n
-  The order of matrices :math:`A_i` (:math:`0 \le n`).
-
-nrhs
-  The number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-a
-  Array containing batch of factorizations of the matrices :math:`A_i`, as returned by the Strided API of the :ref:`onemkl_lapack_potrf_batch_buffer` function.
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices inside the batch array ``a``.
-
-b
-  Array containing batch of matrices :math:`B_i` whose columns are the right-hand sides for the systems of equations.
-
-ldb
-  Leading dimension of :math:`B_i`.
-
-stride_b
-  Stride between the beginnings of matrices :math:`B_i` inside the batch array ``b``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_potrs_batch_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-b
-  Solution matrices :math:`X_i`.
-
-.. _onemkl_lapack_potrs_batch_usm:
-
-potrs_batch (USM Version)
--------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The USM version of ``potrs_batch`` supports the group API and strided API. 
-
-**Group API**
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event potrs_batch(sycl::queue &queue, mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, T **a, std::int64_t *lda, T **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo  
- | Array of ``group_count`` :math:`\text{uplo}_g` parameters.
- | Each of :math:`\text{uplo}_g` indicates whether the upper or lower triangular parts of the input matrices are provided:
- | If :math:`\text{uplo}_g` is ``mkl::uplo::upper``, input matrices from array ``a`` belonging to group :math:`g` store the upper triangular parts,
- | If :math:`\text{uplo}_g` is ``mkl::uplo::lower``, input matrices from array ``a`` belonging to group :math:`g` store the lower triangular parts.
-
-n
- | Array of ``group_count`` :math:`n_g` parameters.
- | Each :math:`n_g` specifies the order of the input matrices from array ``a`` belonging to group :math:`g`.
-
-nrhs
- | Array of ``group_count`` :math:`\text{nrhs}_g` parameters.
- | Each :math:`\text{nrhs}_g` specifies the number of right-hand sides supplied for group :math:`g` in corresponding part of array ``b``.
-
-a
-  Array of ``batch_size`` pointers to Cholesky factored matrices :math:`A_i` as returned by the Group API of the :ref:`onemkl_lapack_potrf_batch_usm` function.
-
-lda
- | Array of ``group_count`` :math:`\text{lda}_g` parameters.
- | Each :math:`\text{lda}_g` specifies the leading dimensions of the matrices from ``a`` belonging to group :math:`g`.
-
-b
-  Array of ``batch_size`` pointers to right-hand side matrices :math:`B_i`, each of size :math:`\text{ldb}_g \cdot \text{nrhs}_g`, where :math:`g` is an index of group corresponding to :math:`B_i`.
-
-ldb
- | Array of ``group_count`` :math:`\text{ldb}_g` parameters.
- | Each :math:`\text{ldb}_g` specifies the leading dimensions of the matrices from ``b`` belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Group API of the :ref:`onemkl_lapack_potrs_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-b
-  Solution matrices :math:`X_i`.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
- | The routine solves for :math:`X_i` the systems of linear equations :math:`A_iX_i = B_i` with a symmetric positive-definite or, for complex data, Hermitian positive-definite matrices :math:`A_i`, given the Cholesky factorization of :math:`A_i`, :math:`i \in \{1...batch\_size\}`:
- | :math:`A_i = U_i^TU_i` for real data, :math:`A_i = U_i^HU_i` for complex data if ``uplo = mkl::uplo::upper``,
- | :math:`A_i = L_iL_i^T` for real data, :math:`A_i = L_iL_i^H` for complex data if ``uplo = mkl::uplo::lower``,
- | where :math:`L_i` is a lower triangular matrix and :math:`U_i` is upper triangular.
- | The systems are solved with multiple right-hand sides stored in the columns of the matrices :math:`B_i`.
- | Before calling this routine, matrices :math:`A_i` should be factorized by call to the Strided API of the :ref:`onemkl_lapack_potrf_batch_usm` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event potrs_batch(sycl::queue &queue, mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T *a, std::int64_t lda, std::int64_t stride_a, T *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo
- | Indicates how the input matrices have been factored:
- | If ``uplo = mkl::uplo::upper``, the upper triangle :math:`U_i` of :math:`A_i` is stored, where :math:`A_i = U_i^TU_i` for real data, :math:`A_i = U_i^HU_i` for complex data.
- | If ``uplo = mkl::uplo::lower``, the upper triangle :math:`L_i` of :math:`A_i` is stored, where :math:`A_i = L_iL_i^T` for real data, :math:`A_i = L_iL_i^H` for complex data.
-
-n
-  The order of matrices :math:`A_i` (:math:`0 \le n`).
-
-nrhs
-  The number of right-hand sides (:math:`0 \le nrhs`).
-
-a
-  Array containing batch of factorizations of the matrices :math:`A_i`, as returned by the Strided API of the :ref:`onemkl_lapack_potrf_batch_usm` function.
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices inside the batch array ``a``.
-
-b
-  Array containing batch of matrices :math:`B_i` whose columns are the right-hand sides for the systems of equations.
-
-ldb
-  Leading dimension of :math:`B_i`.
-
-stride_b
-  Stride between the beginnings of matrices :math:`B_i` inside the batch array ``b``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by the Strided API of the :ref:`onemkl_lapack_potrs_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-b
-  Solution matrices :math:`X_i`.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/potrs_batch_scratchpad_size.rst b/docs/domains/lapack/potrs_batch_scratchpad_size.rst
deleted file mode 100644
index 6136fd8b2..000000000
--- a/docs/domains/lapack/potrs_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,136 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrs_batch_scratchpad_size:
-
-potrs_batch_scratchpad_size
-===========================
-
-Computes size of scratchpad memory required for the :ref:`onemkl_lapack_potrs_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``potrs_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``float`` 
-      * -  ``double`` 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_potrs_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-uplo
- | Array of ``group_count`` :math:`\text{uplo}_g` parameters.
- | Each of :math:`\text{uplo}_g` indicates whether the upper or lower triangular parts of the input matrices are provided:
- | If :math:`\text{uplo}_g` is ``mkl::uplo::upper``, input matrices from array ``a`` belonging to group :math:`g` store the upper triangular parts,
- | If :math:`\text{uplo}_g` is ``mkl::uplo::lower``, input matrices from array ``a`` belonging to group :math:`g` store the lower triangular parts.
-
-n
- | Array of ``group_count`` :math:`n_g` parameters.
- | Each :math:`n_g` specifies the order of the input matrices belonging to group :math:`g`.
-
-nrhs
- | Array of ``group_count`` :math:`\text{nrhs}_g` parameters.
- | Each :math:`rhs_g` specifies the number of right-hand sides supplied for group :math:`g`.
-
-lda
- | Array of ``group_count`` :math:`\text{lda}_g` parameters.
- | Each :math:`\text{lda}_g` specifies the leading dimensions of the matrices belonging to group :math:`g`.
-
-ldb
- | Array of ``group_count`` :math:`\text{ldb}_g` parameters.
- | Each :math:`\text{ldb}_g` specifies the leading dimensions of the matrices belonging to group :math:`g`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes Array of group_count integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_potrs_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_potrs_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-uplo
- | Indicates how the input matrices have been factored:
- | If ``uplo = mkl::uplo::upper``, the upper triangle :math:`U_i` of :math:`A_i` is stored, where :math:`A_i = U_i^TU_i` for real data, :math:`A_i = U_i^HU_i` for complex data.
- | If ``uplo = mkl::uplo::lower``, the upper triangle :math:`L_i` of :math:`A_i` is stored, where :math:`A_i = L_iL_i^T` for real data, :math:`A_i = L_iL_i^H` for complex data.
-
-n
-  Order of matrices :math:`A_i` (:math:`0 \le n`).
-
-nrhs  
-  Number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-lda
-  Leading dimension of :math:`A_i`.
-
-stride_a
-  Stride between the beginnings of matrices inside the batch array ``a``.
-
-ldb
-  Leading dimensions of :math:`B_i`.
-
-stride_b
-  Stride between the beginnings of matrices :math:`B_i` inside the batch array ``b``.
-
-batch_size
-  Number of problems in a batch.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_potrs_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/potrs_scratchpad_size.rst b/docs/domains/lapack/potrs_scratchpad_size.rst
deleted file mode 100644
index 125d68944..000000000
--- a/docs/domains/lapack/potrs_scratchpad_size.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_potrs_scratchpad_size:
-
-potrs_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_potrs` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``potrs_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-    
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_potrs` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-potrs_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_potrs` function will be performed.
-
-upper_lower
-   Indicates how the input matrix has been factored:
-
-   If ``upper_lower = oneapi::mkl::uplo::upper``, the upper triangle   :math:`U` of :math:`A` is stored, where :math:`A = U^{T}U`   for real data, :math:`A = U^{H}U` for complex data.
-
-   If ``upper_lower = oneapi::mkl::uplo::lower``, the lower triangle   :math:`L` of :math:`A` is stored, where :math:`A = LL^{T}`   for real data, :math:`A = LL^{H}` for complex   data.
-
-n
-   The order of matrix :math:`A` (:math:`0 \le n`).
-
-nrhs
-   The number of right-hand sides (:math:`0 \le nrhs`).
-
-lda
-   The leading dimension of ``a``.
-
-ldb
-   The leading dimension of ``b``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_potrs` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/syevd.rst b/docs/domains/lapack/syevd.rst
deleted file mode 100644
index 93df0ba27..000000000
--- a/docs/domains/lapack/syevd.rst
+++ /dev/null
@@ -1,186 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_syevd:
-
-syevd
-=====
-
-Computes all eigenvalues and, optionally, all eigenvectors of a real
-symmetric matrix using divide and conquer algorithm.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``syevd`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-
-The routine computes all the eigenvalues, and optionally all the
-eigenvectors, of a real symmetric matrix :math:`A`. In other words, it
-can compute the spectral factorization of :math:`A` as: :math:`A = Z\lambda Z^T`.
-
-Here :math:`\Lambda` is a diagonal matrix whose diagonal elements are the
-eigenvalues :math:`\lambda_i`, and :math:`Z` is the orthogonal matrix whose
-columns are the eigenvectors :math:`z_{i}`. Thus,
-
-:math:`A z_i = \lambda_i z_i` for :math:`i = 1, 2, ..., n`.
-
-If the eigenvectors are requested, then this routine uses a divide
-and conquer algorithm to compute eigenvalues and eigenvectors.
-However, if only eigenvalues are required, then it uses the
-Pal-Walker-Kahan variant of the QL or QR algorithm.
-
-syevd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void syevd(sycl::queue &queue, jobz jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &w, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, a stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = job::lower``, a stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   The buffer ``a``, size (``lda,*``). The buffer ``a`` contains the matrix
-   :math:`A`. The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``. Must be at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_syevd_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   If ``jobz = job::vec``, then on exit this buffer is overwritten by
-   the orthogonal matrix :math:`Z` which contains the eigenvectors of
-   :math:`A`.
-
-w
-   Buffer, size at least :math:`n`. Contains the eigenvalues
-   of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-syevd (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event syevd(sycl::queue &queue, jobz jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *w, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, a stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = job::lower``, a stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-a
-   Pointer to array containing :math:`A`, size (``lda,*``).
-   The second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``. Must be at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_syevd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   If ``jobz = job::vec``, then on exit this array is overwritten by
-   the orthogonal matrix :math:`Z` which contains the eigenvectors of
-   :math:`A`.
-
-w
-   Pointer to array of size at least :math:`n`. Contains the eigenvalues
-   of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/syevd_scratchpad_size.rst b/docs/domains/lapack/syevd_scratchpad_size.rst
deleted file mode 100644
index e9d08fc18..000000000
--- a/docs/domains/lapack/syevd_scratchpad_size.rst
+++ /dev/null
@@ -1,81 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_syevd_scratchpad_size:
-
-syevd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_syevd` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``syevd_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_syevd` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-syevd_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_syevd` function will be performed.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, a stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = job::lower``, a stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``. Currently ``lda`` is not referenced in
-   this function.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_syevd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/sygvd.rst b/docs/domains/lapack/sygvd.rst
deleted file mode 100644
index 797fdf3a6..000000000
--- a/docs/domains/lapack/sygvd.rst
+++ /dev/null
@@ -1,249 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_sygvd:
-
-sygvd
-=====
-
-Computes all eigenvalues and, optionally, eigenvectors of a real
-generalized symmetric definite eigenproblem using a divide and
-conquer method.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``sygvd`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-The routine computes all the eigenvalues, and optionally, the
-eigenvectors of a real generalized symmetric-definite eigenproblem,
-of the form
-
-:math:`Ax = \lambda Bx`, :math:`ABx = \lambda x`, or :math:`BAx = \lambda x` .
-
-Here :math:`A` and :math:`B` are assumed to be symmetric and :math:`B` is also
-positive definite.
-
-It uses a divide and conquer algorithm.
-
-sygvd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &b, std::int64_t ldb, sycl::buffer<T,1> &w, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-itype
-   Must be 1 or 2 or 3. Specifies the problem type to be solved:
-
-   if :math:`\text{itype} = 1`, the problem type is :math:`Ax =  \lambda Bx`;
-
-   if :math:`\text{itype} = 2`, the problem type is :math:`ABx = \lambda x`;
-
-   if :math:`\text{itype} = 3`, the problem type is :math:`BAx = \lambda x`.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, ``a`` and ``b`` store the upper
-   triangular part of :math:`A` and :math:`B`.
-
-   If ``upper_lower = job::lower``, ``a`` and ``b`` stores the lower
-   triangular part of :math:`A` and :math:`B`.
-
-n
-   The order of the matrices :math:`A` and :math:`B` :math:`(0 \le n)`.
-
-a
-   Buffer, size a\ ``(lda,*)`` contains the upper or lower triangle
-   of the symmetric matrix :math:`A`, as specified by ``upper_lower``. The
-   second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1, n)`.
-
-b
-   Buffer, size ``b`` ``(ldb,*)`` contains the upper or lower triangle
-   of the symmetric matrix :math:`B`, as specified by ``upper_lower``. The
-   second dimension of ``b`` must be at least :math:`\max(1, n)`.
-
-ldb
-   The leading dimension of ``b``; at least :math:`\max(1, n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_sygvd_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   On exit, if ``jobz = job::vec``, then if :math:`\text{info} = 0`, ``a``
-   contains the matrix :math:`Z` of eigenvectors. The eigenvectors are
-   normalized as follows:
-
-   if :math:`\text{itype} = 1` or :math:`2` , :math:`Z^{T}BZ = I`;
-
-   if :math:`\text{itype} = 3` , :math:`Z^{T}B^{-1}Z = I`;
-
-   If ``jobz = job::novec``, then on exit the upper triangle (if
-   ``upper_lower = uplo::upper``) or the lower triangle (if
-   ``upper_lower = uplo::lower``) of :math:`A`, including the diagonal,
-   is destroyed.
-
-b
-   On exit, if :math:`\text{info} \le n`, the part of ``b`` containing the matrix is
-   overwritten by the triangular factor :math:`U` or :math:`L` from the
-   Cholesky factorization :math:`B = U^{T}U` or
-   :math:`B = LL^{T}`.
-
-w
-   Buffer, size at least :math:`n`. If :math:`\text{info} = 0`, contains the
-   eigenvalues of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-sygvd (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *b, std::int64_t ldb, T *w, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-itype
-   Must be 1 or 2 or 3. Specifies the problem type to be solved:
-
-   if :math:`\text{itype} = 1`, the problem type is :math:`Ax =  \lambda Bx`;
-
-   if :math:`\text{itype} = 2`, the problem type is :math:`ABx = \lambda x`;
-
-   if :math:`\text{itype} = 3`, the problem type is :math:`BAx = \lambda x`.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, ``a`` and ``b`` store the upper
-   triangular part of :math:`A` and :math:`B`.
-
-   If ``upper_lower = job::lower``, ``a`` and ``b`` stores the lower
-   triangular part of :math:`A` and :math:`B`.
-
-n
-   The order of the matrices :math:`A` and :math:`B` :math:`(0 \le n)`.
-
-a
-   Pointer to array of size a\ ``(lda,*)`` containing the upper or lower triangle
-   of the symmetric matrix :math:`A`, as specified by ``upper_lower``. The
-   second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1, n)`.
-
-b
-   Pointer to array of size ``b`` ``(ldb,*)`` contains the upper or lower triangle
-   of the symmetric matrix :math:`B`, as specified by ``upper_lower``. The
-   second dimension of ``b`` must be at least :math:`\max(1, n)`.
-
-ldb
-   The leading dimension of ``b``; at least :math:`\max(1, n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_sygvd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   On exit, if ``jobz = job::vec``, then if :math:`\text{info} = 0`, :math:`a`
-   contains the matrix :math:`Z` of eigenvectors. The eigenvectors are
-   normalized as follows:
-
-   if :math:`\text{itype} = 1` or :math:`2`, :math:`Z^{T}BZ = I`;
-   
-   if :math:`\text{itype} = 3`, :math:`Z^{T}B^{-1}Z = I`;
-
-   If ``jobz = job::novec``, then on exit the upper triangle (if
-   ``upper_lower = uplo::upper``) or the lower triangle (if
-   ``upper_lower = uplo::lower``) of :math:`A`, including the diagonal,
-   is destroyed.
-
-b
-   On exit, if :math:`\text{info} \le n`, the part of ``b`` containing the matrix is
-   overwritten by the triangular factor :math:`U` or :math:`L` from the
-   Cholesky factorization :math:`B` = :math:`U^{T}U` or
-   :math:`B = LL^{T}`.
-
-w
-   Pointer to array of size at least ``n``. If :math:`\text{info} = 0`, contains the
-   eigenvalues of the matrix :math:`A` in ascending order.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/sygvd_scratchpad_size.rst b/docs/domains/lapack/sygvd_scratchpad_size.rst
deleted file mode 100644
index d14ef8f61..000000000
--- a/docs/domains/lapack/sygvd_scratchpad_size.rst
+++ /dev/null
@@ -1,92 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_sygvd_scratchpad_size:
-
-sygvd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_sygvd` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-`sygvd_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_sygvd` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-sygvd_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda, std::int64_t ldb) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_sygvd` function will be performed.
-
-itype
-   Must be 1 or 2 or 3. Specifies the problem type to be solved:
-
-   if :math:`\text{itype} = 1`, the problem type is :math:`Ax = \lambda Bx`;
-
-   if :math:`\text{itype} = 2`, the problem type is :math:`ABx = \lambda x`;
-
-   if :math:`\text{itype} = 3`, the problem type is :math:`BAx = \lambda x`.
-
-jobz
-   Must be ``job::novec`` or ``job::vec``.
-
-   If ``jobz = job::novec``, then only eigenvalues are computed.
-
-   If ``jobz = job::vec``, then eigenvalues and eigenvectors are
-   computed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = job::upper``, ``a`` and ``b`` store the upper
-   triangular part of :math:`A` and :math:`B`.
-
-   If ``upper_lower = job::lower``, ``a`` and ``b`` stores the lower
-   triangular part of :math:`A` and :math:`B`.
-
-n
-   The order of the matrices :math:`A` and :math:`B` :math:`(0 \le n)`.
-
-lda
-   The leading dimension of ``a``.
-
-ldb
-   The leading dimension of ``b``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_sygvd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/sytrd.rst b/docs/domains/lapack/sytrd.rst
deleted file mode 100644
index 2df61c2b6..000000000
--- a/docs/domains/lapack/sytrd.rst
+++ /dev/null
@@ -1,205 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_sytrd:
-
-sytrd
-=====
-
-Reduces a real symmetric matrix to tridiagonal form.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``sytrd`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-
-The routine reduces a real symmetric matrix :math:`A` to symmetric
-tridiagonal form :math:`T` by an orthogonal similarity transformation:
-:math:`A = QTQ^T`. The orthogonal matrix :math:`Q` is not formed explicitly
-but is represented as a product of :math:`n-1` elementary reflectors.
-Routines are provided for working with :math:`Q` in this representation .
-
-sytrd (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void sytrd(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &d, sycl::buffer<T,1> &e, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` stores the upper
-   triangular part of :math:`A`.
-
-   If ``upper_lower = uplo::lower``, ``a`` stores the lower
-   triangular part of :math:`A`.
-
-n
-   The order of the matrices :math:`A` :math:`(0 \le n)`.
-
-a
-   The buffer ``a``, size ``(lda,*)``. Contains the upper or lower
-   triangle of the symmetric matrix :math:`A`, as specified by
-   ``upper_lower``.
-
-   The second dimension of ``a`` must be at least :math:`\max(1,n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_sytrd_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   On exit,
-
-   if ``upper_lower = uplo::upper``, the diagonal and first
-   superdiagonal of :math:`A` are overwritten by the corresponding
-   elements of the tridiagonal matrix :math:`T`, and the elements above
-   the first superdiagonal, with the buffer ``tau``, represent the
-   orthogonal matrix :math:`Q` as a product of elementary reflectors;
-
-   if ``upper_lower = uplo::lower``, the diagonal and first
-   subdiagonal of :math:`A` are overwritten by the corresponding elements
-   of the tridiagonal matrix :math:`T`, and the elements below the first
-   subdiagonal, with the buffer ``tau``, represent the orthogonal matrix
-   :math:`Q` as a product of elementary reflectors.
-
-d
-   Buffer containing the diagonal elements of the matrix :math:`T`. The
-   dimension of ``d`` must be at least :math:`\max(1, n)`.
-
-e
-   Buffer containing the off diagonal elements of the matrix :math:`T`.
-   The dimension of ``e`` must be at least :math:`\max(1, n-1)`.
-
-tau
-   Buffer, size at least :math:`\max(1, n)`. Stores :math:`(n-1)` scalars that
-   define elementary reflectors in decomposition of the unitary
-   matrix :math:`Q` in a product of :math:`n-1` elementary reflectors.
-   :math:`\tau(n)` is used as workspace.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-sytrd (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *d, T *e, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, ``a`` stores the upper
-   triangular part of :math:`A`.
-
-   If ``upper_lower = uplo::lower``, ``a`` stores the lower
-   triangular part of :math:`A`.
-
-n
-   The order of the matrices :math:`A` :math:`(0 \le n)`.
-
-a
-   The pointer to matrix :math:`A`, size ``(lda,*)``. Contains the upper or lower
-   triangle of the symmetric matrix :math:`A`, as specified by
-   ``upper_lower``.
-   The second dimension of ``a`` must be at least :math:`\max(1,n)`.
-
-lda
-   The leading dimension of ``a``; at least :math:`\max(1,n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_sytrd_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-   
-a
-   On exit,
-
-   if ``upper_lower = uplo::upper``, the diagonal and first
-   superdiagonal of :math:`A` are overwritten by the corresponding
-   elements of the tridiagonal matrix :math:`T`, and the elements above
-   the first superdiagonal, with the array ``tau``, represent the
-   orthogonal matrix :math:`Q` as a product of elementary reflectors;
-
-   if ``upper_lower = uplo::lower``, the diagonal and first
-   subdiagonal of :math:`A` are overwritten by the corresponding elements
-   of the tridiagonal matrix :math:`T`, and the elements below the first
-   subdiagonal, with the array ``tau``, represent the orthogonal matrix
-   :math:`Q` as a product of elementary reflectors.
-
-d
-   Pointer to diagonal elements of the matrix :math:`T`. The
-   dimension of ``d`` must be at least :math:`\max(1, n)`.
-
-e
-   Pointer to off diagonal elements of the matrix :math:`T`.
-   The dimension of ``e`` must be at least :math:`\max(1, n-1)`.
-
-tau
-   Pointer to array of size at least :math:`\max(1, n)`. Stores :math:`(n-1)` scalars that
-   define elementary reflectors in decomposition of the unitary
-   matrix :math:`Q` in a product of :math:`n-1` elementary reflectors.
-   :math:`\tau(n)` is used as workspace.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/sytrd_scratchpad_size.rst b/docs/domains/lapack/sytrd_scratchpad_size.rst
deleted file mode 100644
index b3db401f1..000000000
--- a/docs/domains/lapack/sytrd_scratchpad_size.rst
+++ /dev/null
@@ -1,72 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_sytrd_scratchpad_size:
-
-sytrd_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_sytrd` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``sytrd_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double``
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_sytrd` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-sytrd_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_sytrd` function will be performed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``.
-
-   If ``upper_lower = uplo::upper``, a stores the upper triangular
-   part of :math:`A`.
-
-   If ``upper_lower = uplo::lower``, a stores the lower triangular
-   part of :math:`A`.
-
-n
-   The order of the matrices :math:`A` :math:`(0 \le n)`.
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_sytrd` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/sytrf.rst b/docs/domains/lapack/sytrf.rst
deleted file mode 100644
index 0baea1476..000000000
--- a/docs/domains/lapack/sytrf.rst
+++ /dev/null
@@ -1,166 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_sytrf:
-
-sytrf
-=====
-
-Computes the Bunch-Kaufman factorization of a symmetric matrix.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``sytrf`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine computes the factorization of a real/complex symmetric
-matrix :math:`A` using the Bunch-Kaufman diagonal pivoting method. The
-form of the factorization is:
-
--  if ``upper_lower=uplo::upper``, :math:`A` = :math:`UDU^{T}`
-
--  if ``upper_lower=uplo::lower``, :math:`A` = :math:`LDL^{T}`
-
-where :math:`A` is the input matrix, :math:`U` and :math:`L` are products of
-permutation and triangular matrices with unit diagonal (upper
-triangular for :math:`U` and lower triangular for :math:`L`), and :math:`D` is a
-symmetric block-diagonal matrix with :math:`1 \times 1` and :math:`2 \times 2` diagonal
-blocks. :math:`U` and :math:`L` have :math:`2 \times 2` unit diagonal blocks
-corresponding to the :math:`2 \times 2` blocks of :math:`D`.
-
-sytrf (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void sytrf(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<int_64,1> &ipiv, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of    :math:`A` is stored and how :math:`A` is factored:
-
-      If   ``upper_lower=uplo::upper``, the buffer ``a`` stores the upper triangular   part of the matrix :math:`A`, and :math:`A` is factored as :math:`UDU^T`.
-
-      If ``upper_lower=uplo::lower``, the buffer ``a`` stores   the lower triangular part of the matrix :math:`A`, and :math:`A` is factored   as :math:`LDL^T`.
-
-n
-   The order of matrix :math:`A` (:math:`0 \le n`).
-
-a
-   The buffer ``a``, size :math:`\max(1,lda \cdot n)`. The buffer ``a``    contains either the upper or the lower triangular part of the matrix   :math:`A` (see ``upper_lower``). The second dimension of ``a`` must be at   least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_sytrf_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   The upper or lower triangular part of a is overwritten by    details of the block-diagonal matrix :math:`D` and the multipliers used   to obtain the factor :math:`U` (or :math:`L`).
-
-ipiv
-   Buffer, size at least :math:`\max(1, n)`. Contains details of    the interchanges and the block structure of :math:`D`. If   :math:`\text{ipiv}(i)=k>0`, then :math:`d_{ii}` is a :math:`1 \times 1` block, and the   :math:`i`-th row and column of :math:`A` was interchanged with the :math:`k`-th   row and column.
-
-      If ``upper_lower=oneapi::mkl::uplo::upper``   and :math:`\text{ipiv}(i)=\text{ipiv}(i-1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i`-1, and (:math:`i-1`)-th row and column of   :math:`A` was interchanged with the :math:`m`-th row and   column.
-
-      If ``upper_lower=oneapi::mkl::uplo::lower`` and   :math:`\text{ipiv}(i)=\text{ipiv}(i+1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i+1`, and (:math:`i+1`)-th row and column   of :math:`A` was interchanged with the :math:`m`-th row and column.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-sytrf (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, int_64 *ipiv, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of    :math:`A` is stored and how :math:`A` is factored:
-
-      If   ``upper_lower=uplo::upper``, the array ``a`` stores the upper triangular   part of the matrix :math:`A`, and :math:`A` is factored as :math:`UDU^T`.
-
-      If ``upper_lower=uplo::lower``, the array ``a`` stores   the lower triangular part of the matrix :math:`A`, and :math:`A` is factored   as :math:`LDL^T`.
-
-n
-   The order of matrix :math:`A` (:math:`0 \le n`).
-
-a
-   The pointer to :math:`A`, size :math:`\max(1,\text{lda} \cdot n)`, containing either the upper or the lower triangular part of the matrix   :math:`A` (see ``upper_lower``). The second dimension of ``a`` must be at   least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_sytrf_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   The upper or lower triangular part of a is overwritten by    details of the block-diagonal matrix :math:`D` and the multipliers used   to obtain the factor :math:`U` (or :math:`L`).
-
-ipiv
-   Pointer to array of size at least :math:`\max(1, n)`. Contains details of    the interchanges and the block structure of :math:`D`. If   :math:`\text{ipiv}(i)=k>0`, then :math:`d_{ii}` is a :math:`1 \times 1` block, and the   :math:`i`-th row and column of :math:`A` was interchanged with the :math:`k`-th   row and column.
-
-      If ``upper_lower=oneapi::mkl::uplo::upper``   and :math:`\text{ipiv}(i)=\text{ipiv}(i-1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i-1`, and (:math:`i-1`)-th row and column of   :math:`A` was interchanged with the :math:`m`-th row and   column.
-      
-      If ``upper_lower=oneapi::mkl::uplo::lower`` and   :math:`\text{ipiv}(i)=\text{ipiv}(i+1)=-m<0`, then :math:`D` has a :math:`2 \times 2` block in   rows/columns :math:`i` and :math:`i+1`, and (:math:`i+1`)-th row and column   of :math:`A` was interchanged with the :math:`m`-th row and column.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/sytrf_scratchpad_size.rst b/docs/domains/lapack/sytrf_scratchpad_size.rst
deleted file mode 100644
index 5b56c6385..000000000
--- a/docs/domains/lapack/sytrf_scratchpad_size.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_sytrf_scratchpad_size:
-
-sytrf_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_sytrf` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``sytrf_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-  
-        * -  T 
-        * -  ``float`` 
-        * -  ``double`` 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_sytrf` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-sytrf_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_sytrf` function will be performed.
-
-upper_lower
-   Indicates whether the upper or lower triangular part of :math:`A` is
-   stored and how :math:`A` is factored:
-
-   If ``upper_lower=uplo::upper``, the buffer ``a`` stores the
-   upper triangular part of the matrix :math:`A`, and :math:`A` is
-   factored as :math:`UDU^T`.
-
-   If ``upper_lower=uplo::lower``, the buffer ``a`` stores the
-   lower triangular part of the matrix :math:`A`, and :math:`A` is
-   factored as :math:`LDL^T`
-
-n
-   The order of the matrix :math:`A` (:math:`0 \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_sytrf` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/trtrs.rst b/docs/domains/lapack/trtrs.rst
deleted file mode 100644
index 8f980465b..000000000
--- a/docs/domains/lapack/trtrs.rst
+++ /dev/null
@@ -1,197 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_trtrs:
-
-trtrs
-=====
-
-Solves a system of linear equations with a triangular coefficient
-matrix, with multiple right-hand sides.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``trtrs`` supports the following precisions.
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -  T 
-         * -  ``float`` 
-         * -  ``double`` 
-         * -  ``std::complex<float>`` 
-         * -  ``std::complex<double>`` 
-
-The routine solves for :math:`X` the following systems of linear
-equations with a triangular matrix :math:`A`, with multiple right-hand
-sides stored in :math:`B`:
-
-    .. list-table::
-       :header-rows: 1
- 
-       * -     :math:`AX = B`
-         -
-         -     if ``transa`` =\ ``transpose::nontrans``,
-       * -     \ :math:`A^TX = B`\
-         -
-         -     if ``transa`` =\ ``transpose::trans``,
-       * -     :math:`A^HX = B`
-         -
-         -     if ``transa`` =\ ``transpose::conjtrans`` (for complex    matrices only).
-
-trtrs (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void trtrs(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &b, std::int64_t ldb, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether :math:`A` is upper or lower    triangular:
-
-      If upper_lower = ``uplo::upper``, then   :math:`A` is upper triangular.
-
-      If upper_lower =   ``uplo::lower``, then :math:`A` is lower triangular.
-
-transa
-   If transa = ``transpose::nontrans``, then    :math:`AX = B` is solved for :math:`X`.
-
-   If   transa = ``transpose::trans``, then :math:`A^{T}X = B` is solved for :math:`X`.
-
-   If transa =   ``transpose::conjtrans``, then :math:`A^{H}X = B` is   solved for :math:`X`.
-
-unit_diag
-   If unit_diag = ``diag::nonunit``, then :math:`A` is not a    unit triangular matrix.
-
-   If unit_diag = ``diag::unit``,   then :math:`A` is unit triangular: diagonal elements of :math:`A` are assumed   to be 1 and not referenced in the array ``a``.
-
-n
-   The order of :math:`A`; the number of rows in :math:`B`;    :math:`n \ge 0`.
-
-nrhs
-   The number of right-hand sides; :math:`\text{nrhs} \ge 0`.
-
-a
-   Buffer containing the matrix :math:`A`.      The    second dimension of ``a`` must be at least :math:`\max(1,n)`.
-
-lda
-   The leading dimension of ``a``;    :math:`\text{lda} \ge \max(1, n)`.
-
-b
-   Buffer containing the matrix :math:`B` whose columns are the    right-hand sides for the systems of equations.      The   second dimension of ``b`` at least :math:`\max(1,\text{nrhs})`.
-
-ldb
-   The leading dimension of ``b``; :math:`\text{ldb} \ge \max(1, n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_trtrs_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-b
-   Overwritten by the solution matrix :math:`X`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-trtrs (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t nrhs, T *a, std::int64_t lda, T *b, std::int64_t ldb, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Indicates whether :math:`A` is upper or lower    triangular:
-
-      If upper_lower = ``uplo::upper``, then   :math:`A` is upper triangular.
-
-      If upper_lower =   ``uplo::lower``, then :math:`A` is lower triangular.
-
-transa
-   If transa = ``transpose::nontrans``, then    :math:`AX = B` is solved for :math:`X`.
-
-   If   transa = ``transpose::trans``, then :math:`A^{T}X = B` is solved for :math:`X`.
-
-   If transa =   ``transpose::conjtrans``, then :math:`A^{H}X = B` is   solved for :math:`X`.
-
-unit_diag
-   If unit_diag = ``diag::nonunit``, then :math:`A` is not a    unit triangular matrix.
-
-   If unit_diag = ``diag::unit``,   then :math:`A` is unit triangular: diagonal elements of :math:`A` are assumed   to be 1 and not referenced in the array ``a``.
-
-n
-   The order of :math:`A`; the number of rows in :math:`B`;    :math:`n \ge 0`.
-
-nrhs
-   The number of right-hand sides; :math:`\text{nrhs} \ge 0`.
-
-a
-   Array containing the matrix :math:`A`.      The    second dimension of ``a`` must be at least :math:`\max(1,n)`.
-
-lda
-   The leading dimension of ``a``;    :math:`\text{lda} \ge \max(1, n)`.
-
-b
-   Array containing the matrix :math:`B` whose columns are the    right-hand sides for the systems of equations.      The   second dimension of ``b`` at least :math:`\max(1,\text{nrhs})`.
-
-ldb
-   The leading dimension of ``b``; :math:`\text{ldb} \ge \max(1, n)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_trtrs_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-b
-   Overwritten by the solution matrix :math:`X`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/trtrs_scratchpad_size.rst b/docs/domains/lapack/trtrs_scratchpad_size.rst
deleted file mode 100644
index 150800492..000000000
--- a/docs/domains/lapack/trtrs_scratchpad_size.rst
+++ /dev/null
@@ -1,94 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_trtrs_scratchpad_size:
-
-trtrs_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_trtrs` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``trtrs_scratchpad_size`` supports the following precisions.
-
-    .. list-table:: 
-       :header-rows: 1
-
-       * -  T 
-       * -  ``float`` 
-       * -  ``double`` 
-       * -  ``std::complex<float>`` 
-       * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_trtrs` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-trtrs_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_trtrs` function will be performed.
-
-upper_lower
-   Indicates whether :math:`A` is upper or lower    triangular:
-
-   If upper_lower = ``uplo::upper``, then   :math:`A` is upper triangular.
-
-   If upper_lower =   ``uplo::lower``, then :math:`A` is lower triangular.
-
-trans
-   Indicates the form of the equations:
-
-   If ``trans=oneapi::mkl::transpose::nontrans``, then :math:`AX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::trans``, then :math:`A^TX = B` is solved
-   for :math:`X`.
-
-   If ``trans=oneapi::mkl::transpose::conjtrans``, then :math:`A^HX = B` is
-   solved for :math:`X`.
-
-diag
-   If diag = ``oneapi::mkl::diag::nonunit``, then :math:`A` is not a    unit triangular matrix.
-
-   If unit_diag = ``diag::unit``,   then :math:`A` is unit triangular: diagonal elements of :math:`A` are assumed   to be 1 and not referenced in the array ``a``.
-
-n
-   The order of :math:`A`; the number of rows in :math:`B`;    :math:`n \ge 0`.
-
-nrhs
-   The number of right-hand sides (:math:`0 \le \text{nrhs}`).
-
-lda
-   The leading dimension of ``a``; :math:`\text{lda} \ge \max(1, n)`.
-
-ldb
-   The leading dimension of ``b``; :math:`\text{ldb} \ge \max(1, n)`.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_trtrs` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/ungbr.rst b/docs/domains/lapack/ungbr.rst
deleted file mode 100644
index 671d169f5..000000000
--- a/docs/domains/lapack/ungbr.rst
+++ /dev/null
@@ -1,231 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungbr:
-
-ungbr
-=====
-
-Generates the complex unitary matrix :math:`Q` or :math:`P^{t}` determined by
-:ref:`onemkl_lapack_gebrd`.
-
-.. container:: section
-
-  .. rubric:: Description
-     
-``ungbr`` supports the following precisions.
-
-      .. list-table:: 
-         :header-rows: 1
-
-         * -  T 
-         * -  ``std::complex<float>`` 
-         * -  ``std::complex<double>`` 
-
-The routine generates the whole or part of the unitary matrices :math:`Q`
-and :math:`P^{H}` formed by the routines
-:ref:`onemkl_lapack_gebrd`.
-All valid combinations of arguments are described in *Input Parameters*; in
-most cases you need the following:
-
-To compute the whole :math:`m \times m` matrix :math:`Q`, use:
-
-::
-
-   oneapi::mkl::lapack::ungbr(queue, generate::q, m, m, n, a, ...)
-
-(note that the buffer ``a`` must have at least :math:`m` columns).
-
-To form the :math:`n` leading columns of :math:`Q` if :math:`m > n`, use:
-
-::
-
-   oneapi::mkl::lapack::ungbr(queue, generate::q, m, n, n, a, ...)
-
-To compute the whole :math:`n \times n` matrix :math:`P^{T}`, use:
-
-::
-
-   oneapi::mkl::lapack::ungbr(queue, generate::p, n, n, m, a, ...)
-
-(note that the array ``a`` must have at least :math:`n` rows).
-
-To form the :math:`m` leading rows of :math:`P^{T}` if :math:`m < n`, use:
-
-::
-
-   oneapi::mkl::lapack::ungbr(queue, generate::p, m, n, m, a, ...)
-
-ungbr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void ungbr(sycl::queue &queue, oneapi::mkl::generate gen, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-gen
-   Must be ``generate::q`` or ``generate::p``.
-
-   If ``gen = generate::q``, the routine generates the matrix :math:`Q`.
-
-   If ``gen = generate::p``, the routine generates the matrix
-   :math:`P^{T}`.
-
-m
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le m)`.
-
-   If ``gen = generate::q``, :math:`m \ge n \ge \min(m, k)`.
-
-   If ``gen = generate::p``, :math:`n \ge m \ge \min(n, k)`.
-
-n
-   The number of columns in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le n)`. See ``m`` for constraints.
-
-k
-   If ``gen = generate::q``, the number of columns in the original
-   :math:`m \times k` matrix returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-   If ``gen = generate::p``, the number of rows in the original
-   :math:`k \times n` matrix returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-a
-   The buffer ``a`` as returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-lda
-   The leading dimension of ``a``.
-
-tau
-   For ``gen = generate::q``, the array ``tauq`` as returned by :ref:`onemkl_lapack_gebrd`.
-   For ``gen = generate::p``, the array ``taup`` as returned by :ref:`onemkl_lapack_gebrd`.
-
-   The dimension of ``tau`` must be at least :math:`\max(1, \min(m, k))` for
-   ``gen = generate::q``, or :math:`\max(1, \min(m, k))` for
-   ``gen = generate::p``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type :math:`T`.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ungbr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by :math:`n` leading columns of the :math:`m \times m` unitary matrix
-   :math:`Q` or :math:`P^{T}`, (or the leading rows or columns thereof)
-   as specified by ``gen``, ``m``, and ``n``.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-ungbr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate gen, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-gen
-   Must be ``generate::q`` or ``generate::p``.
-
-   If ``gen = generate::q``, the routine generates the matrix :math:`Q`.
-
-   If ``gen = generate::p``, the routine generates the matrix
-   :math:`P^{T}`.
-
-m
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \ge m)`.
-
-   If ``gen = generate::q``, :math:`m \ge n \ge \min(m, k)`.
-
-   If ``gen = generate::p``, :math:`n \ge m \ge \min(n, k)`.
-
-n
-   The number of columns in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le n)`. See ``m`` for constraints.
-
-k
-   If ``gen = generate::q``, the number of columns in the original
-   :math:`m \times k` matrix returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-   If ``gen = generate::p``, the number of rows in the original
-   :math:`k \times n` matrix returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-a
-   The pointer to ``a`` as returned by
-   :ref:`onemkl_lapack_gebrd`.
-
-lda
-   The leading dimension of ``a``.
-
-tau
-   For ``gen = generate::q``, the array ``tauq`` as returned by :ref:`onemkl_lapack_gebrd`.
-   For ``gen = generate::p``, the array ``taup`` as returned by :ref:`onemkl_lapack_gebrd`.
-
-   The dimension of ``tau`` must be at least :math:`\max(1, \min(m, k))` for
-   ``gen = generate::q``, or :math:`\max(1, \min(m, k))` for
-   ``gen = generate::p``.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type :math:`T`.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ungbr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   Overwritten by :math:`n` leading columns of the :math:`m \times m` unitary matrix
-   :math:`Q` or :math:`P^{T}`, (or the leading rows or columns thereof)
-   as specified by ``gen``, ``m``, and ``n``.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-         
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/ungbr_scratchpad_size.rst b/docs/domains/lapack/ungbr_scratchpad_size.rst
deleted file mode 100644
index 5a39e9cc1..000000000
--- a/docs/domains/lapack/ungbr_scratchpad_size.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungbr_scratchpad_size:
-
-ungbr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_ungbr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``ungbr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type :math:`T` the scratchpad memory to be passed to :ref:`onemkl_lapack_ungbr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-ungbr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate gen, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t &scratchpad_size) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_ungbr` function will be performed.
-
-gen
-   Must be ``generate::q`` or ``generate::p``.
-
-   If ``gen = generate::q``, the routine generates the matrix
-   :math:`Q`.
-
-   If ``gen = generate::p``, the routine generates the matrix
-   :math:`P^{T}`.
-
-m
-   The number of rows in the matrix :math:`Q` or :math:`P^{T}` to be
-   returned :math:`(0 \le m)`.
-
-   If ``gen = generate::q``, :math:`m \ge n \ge \min(m, k)`.
-
-   If ``gen = generate::p``, :math:`n \ge m \ge \min(n, k)`.
-
-n
-   The number of columns in the matrix :math:`Q` or :math:`P^{T}` to
-   be returned :math:`(0 \le n)`. See m for constraints.
-
-k
-   If ``gen = generate::q``, the number of columns in the original
-   :math:`m \times k` matrix reduced by
-   :ref:`onemkl_lapack_gebrd`.
-
-   If ``gen = generate::p``, the number of rows in the original
-   :math:`k \times n` matrix reduced by
-   :ref:`onemkl_lapack_gebrd`.
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ungbr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/ungqr.rst b/docs/domains/lapack/ungqr.rst
deleted file mode 100644
index 044546adf..000000000
--- a/docs/domains/lapack/ungqr.rst
+++ /dev/null
@@ -1,181 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungqr:
-
-ungqr
-=====
-
-Generates the complex unitary matrix :math:`Q` of the QR factorization formed
-by :ref:`onemkl_lapack_geqrf`.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``ungqr`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine generates the whole or part of :math:`m \times m` unitary
-matrix :math:`Q` of the QR factorization formed by the routines
-:ref:`onemkl_lapack_geqrf`.
-
-Usually :math:`Q` is determined from the QR factorization of an :math:`m \times p` matrix :math:`A` with :math:`m \ge p`. To compute the whole matrix
-:math:`Q`, use:
-
-::
-
-    oneapi::mkl::lapack::ungqr(queue, m, m, p, a, lda, tau, scratchpad, scratchpad_size)
-
-To compute the leading :math:`p` columns of :math:`Q` (which form an
-orthonormal basis in the space spanned by the columns of :math:`A`):
-
-::
-
-    oneapi::mkl::lapack::ungqr(queue, m, p, p, a, lda, tau, scratchpad, scratchpad_size)
-
-To compute the matrix :math:`Q^{k}` of the QR factorization of
-the leading :math:`k` columns of the matrix :math:`A`:
-
-::
-
-    oneapi::mkl::lapack::ungqr(queue, m, m, k, a, lda, tau, scratchpad, scratchpad_size)
-
-To compute the leading :math:`k` columns of :math:`Q^{k}` (which form
-an orthonormal basis in the space spanned by the leading :math:`k`
-columns of the matrix :math:`A`):
-
-::
-
-    oneapi::mkl::lapack::ungqr(queue, m, k, k, a, lda, tau, scratchpad, scratchpad_size)
-
-ungqr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-a
-   The buffer ``a`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-lda
-   The leading dimension of ``a`` (:math:`\text{lda} \le m`).
-
-tau
-   The buffer ``tau`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ungqr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by :math:`n` leading columns of the :math:`m \times m`
-   orthogonal matrix :math:`Q`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-ungqr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`A` (:math:`0 \le n`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-a
-   The pointer to ``a`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-lda
-   The leading dimension of ``a`` (:math:`\text{lda} \le m`).
-
-tau
-   The pointer to ``tau`` as returned by
-   :ref:`onemkl_lapack_geqrf`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ungqr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-a
-   Overwritten by :math:`n` leading columns of the :math:`m \times m`
-   orthogonal matrix :math:`Q`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/ungqr_batch.rst b/docs/domains/lapack/ungqr_batch.rst
deleted file mode 100644
index 0d69e33b8..000000000
--- a/docs/domains/lapack/ungqr_batch.rst
+++ /dev/null
@@ -1,274 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungqr_batch:
-
-ungqr_batch
-===========
-
-Generates the complex unitary matrices :math:`Q_i` of the batch of QR factorizations formed by the :ref:`onemkl_lapack_geqrf_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``ungqr_batch`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-.. _onemkl_lapack_ungqr_batch_buffer:
-
-ungqr_batch (Buffer Version)
-----------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The buffer version of ``ungqr_batch`` supports only the strided API. 
-   
-**Strided API**
-
- | The routine generates the wholes or parts of :math`m \times m` unitary matrices :math:`Q_i` of the batch of QR factorization formed by the Strided API of the :ref:`onemkl_lapack_geqrf_batch_buffer`.
- | Usually :math:`Q_i` is determined from the QR factorization of an :math:`m \times p` matrix :math:`A_i` with :math`m \ge p`.
- | To compute the whole matrices :math:`Q_i`, use:
- | ``ungqr_batch(queue, m, m, p, a, ...)``
- | To compute the leading :math:`p` columns of :math:`Q_i` (which form an orthonormal basis in the space spanned by the columns of :math:`A_i`):
- | ``ungqr_batch(queue, m, p, p, a, ...)``
- | To compute the matrices :math:`Q_i`^k` of the QR factorizations of leading :math:`k` columns of the matrices :math:`A_i`:
- | ``ungqr_batch(queue, m, m, k, a, ...)``
- | To compute the leading :math:`k` columns of :math:`Q_i^k` (which form an orthonormal basis in the space spanned by leading :math:`k` columns of the matrices :math:`A_i`):
- | ``ungqr_batch(queue, m, k, k, a, ...)``
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T> &a, std::int64_t lda, std::int64_t stride_a, sycl::buffer<T> &tau, std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer<T> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in the matrices :math:`A_i` (:math:`0\le n`).
-
-k
-  Number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k \le n`).
-
-a
-  Array resulting after call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-lda
-  Leading dimension of :math:`A_i` (:math:`\text{lda} \le m`).
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-tau
-  Array resulting after call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-stride_tau
-  Stride between the beginnings of arrays :math:`tau_i` inside the array ``tau``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size 
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by strided version of the Strided API of the :ref:`onemkl_lapack_ungqr_batch_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-  Array data is overwritten by ``a`` batch of n leading columns of the :math:`m \times m` unitary matrices :math:`Q_i`.
-
-.. _onemkl_lapack_ungqr_batch_usm:
-
-ungqr_batch (USM Version)
--------------------------
-
-.. container:: section
-
-  .. rubric:: Description
-
-The USM version of ``ungqr_batch`` supports the group API and strided API. 
-
-**Group API**
-
- | The routine generates the wholes or parts of :math`m \times m` unitary matrices :math:`Q_i` of the batch of QR factorization formed by the Group API of the :ref:`onemkl_lapack_geqrf_batch_buffer`.
- | Usually :math:`Q_i` is determined from the QR factorization of an :math:`m \times p` matrix :math:`A_i` with :math`m \ge p`.
- | To compute the whole matrices :math:`Q_i`, use:
- | ``ungqr_batch(queue, m, m, p, a, ...)``
- | To compute the leading :math:`p` columns of :math:`Q_i` (which form an orthonormal basis in the space spanned by the columns of :math:`A_i`):
- | ``ungqr_batch(queue, m, p, p, a, ...)``
- | To compute the matrices :math:`Q_i`^k` of the QR factorizations of leading :math:`k` columns of the matrices :math:`A_i`:
- | ``ungqr_batch(queue, m, m, k, a, ...)``
- | To compute the leading :math:`k` columns of :math:`Q_i^k` (which form an orthonormal basis in the space spanned by leading :math:`k` columns of the matrices :math:`A_i`):
- | ``ungqr_batch(queue, m, k, k, a, ...)``
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, T **a, std::int64_t *lda, T **tau, std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Array of ``group_count`` :math:`m_g` parameters as previously supplied to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-n
-  Array of ``group_count`` :math:`n_g` parameters as previously supplied to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-k
- | Array of ``group_count`` :math:`k_g` parameters as previously supplied to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
- | The number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k_g \le n_g`).
-
-a
-  Array resulting after call to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-lda
-  Array of leading dimensions of :math:`A_i` as previously supplied to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-tau
-  Array resulting after call to the Group API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by Group API of the :ref:`onemkl_lapack_ungqr_batch_scratchpad_size` function.
-
-events
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-   
-a
-  Matrices pointed to by array ``a`` are overwritten by :math:`n_g` leading columns of the :math:`m_g \times m_g` orthogonal matrices :math:`Q_i`, where :math:`g` is an index of group of parameters corresponding to :math:`Q_i`.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Strided API**
-
- | The routine generates the wholes or parts of :math`m \times m` unitary matrices :math:`Q_i` of the batch of QR factorization formed by the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm`.
- | Usually :math:`Q_i` is determined from the QR factorization of an :math:`m \times p` matrix :math:`A_i` with :math`m \ge p`.
- | To compute the whole matrices :math:`Q_i`, use:
- | ``ungqr_batch(queue, m, m, p, a, ...)``
- | To compute the leading :math:`p` columns of :math:`Q_i` (which form an orthonormal basis in the space spanned by the columns of :math:`A_i`):
- | ``ungqr_batch(queue, m, p, p, a, ...)``
- | To compute the matrices :math:`Q_i`^k` of the QR factorizations of leading :math:`k` columns of the matrices :math:`A_i`:
- | ``ungqr_batch(queue, m, m, k, a, ...)``
- | To compute the leading :math:`k` columns of :math:`Q_i^k` (which form an orthonormal basis in the space spanned by leading :math:`k` columns of the matrices :math:`A_i`):
- | ``ungqr_batch(queue, m, k, k, a, ...)``
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, std::int64_t stride_a, T *tau, std::int64_t stride_tau, std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in the matrices :math:`A_i` (:math:`0\le n`).
-
-k
-  Number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k \le n`).
-
-a
-  Array resulting after call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-lda
-  Leading dimension of :math:`A_i` (:math:`\text{lda} \le m`).
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-tau
-  Array resulting after call to the Strided API of the :ref:`onemkl_lapack_geqrf_batch_usm` function.
-
-stride_tau
-  Stride between the beginnings of arrays :math:`tau_i` inside the array ``tau``.
-
-batch_size
-  Number of problems in a batch.
-
-scratchpad
-  Scratchpad memory to be used by routine for storing intermediate results.
-
-scratchpad_size 
-  Size of scratchpad memory as a number of floating point elements of type ``T``. Size should not be less then the value returned by strided version of the Strided API of the :ref:`onemkl_lapack_ungqr_batch_scratchpad_size` function.
-
-events  
-  List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-  Array data is overwritten by ``a`` batch of n leading columns of the :math:`m \times m` unitary matrices :math:`Q_i`.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/ungqr_batch_scratchpad_size.rst b/docs/domains/lapack/ungqr_batch_scratchpad_size.rst
deleted file mode 100644
index 6a427d7b6..000000000
--- a/docs/domains/lapack/ungqr_batch_scratchpad_size.rst
+++ /dev/null
@@ -1,123 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungqr_batch_scratchpad_size:
-
-ungqr_batch_scratchpad_size
-===========================
-
-Computes size of scratchpad memory required for the :ref:`onemkl_lapack_ungqr_batch` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``ungqr_batch_scratchpad_size`` supports the following precisions.
-
-   .. list-table:: 
-      :header-rows: 1
-
-      * -  T 
-      * -  ``std::complex<float>`` 
-      * -  ``std::complex<double>`` 
-
-**Group API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_ungqr_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Array of ``group_count`` :math:`m_g` parameters.
-
-n
-  Array of ``group_count`` :math:`n_g` parameters.
-
-k
- | Array of ``group_count`` :math:`k_g` parameters.
- | Number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k_g \le n_g`).
- 
-lda
-  Array of leading dimensions of :math:`A_i`.
-
-group_count
-  Number of groups of parameters. Must be at least 0.
-
-group_sizes
-  Array of ``group_count`` integers. Array element with index :math:`g` specifies the number of problems to solve for each of the groups of parameters :math:`g`. So the total number of problems to solve, ``batch_size``, is a sum of all parameter group sizes.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Group API of the :ref:`onemkl_lapack_ungqr_batch` function.
-
-**Strided API**
-
-Computes the number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_ungqr_batch` function.
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size)
-    };
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-
-queue
-  Device queue where calculations will be performed.
-
-m
-  Number of rows in the matrices :math:`A_i` (:math:`0 \le m`).
-
-n
-  Number of columns in the matrices :math:`A_i` (:math:`0 \le n`).
-
-k
-  Number of elementary reflectors whose product defines the matrices :math:`Q_i` (:math:`0 \le k \le n`).
-
-lda
-  Leading dimensions of :math:`A_i` (:math:`\text{lda} \le m`).
-
-stride_a
-  Stride between the beginnings of matrices :math:`A_i` inside the batch array ``a``.
-
-stride_tau
-  Stride between the beginnings of arrays :math:`\tau_i` inside the array ``tau``.
-
-batch_size
-  Number of problems in a batch.
-
-.. container:: section
-   
-  .. rubric:: Return Values
-
-Number of elements of type ``T`` the scratchpad memory should able to hold to be passed to the Strided API of the :ref:`onemkl_lapack_ungqr_batch` function.
-
-**Parent topic:** :ref:`onemkl_lapack-like-extensions-routines`
-
diff --git a/docs/domains/lapack/ungqr_scratchpad_size.rst b/docs/domains/lapack/ungqr_scratchpad_size.rst
deleted file mode 100644
index 7fed35d15..000000000
--- a/docs/domains/lapack/ungqr_scratchpad_size.rst
+++ /dev/null
@@ -1,70 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungqr_scratchpad_size:
-
-ungqr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_ungqr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``ungqr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ungqr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-ungqr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_ungqr` function will be performed.
-
-m
-   The number of rows in the matrix :math:`A` (:math:`0 \le m`).
-
-n
-   The number of columns the matrix :math:`A` (:math:`0 \le n \le m`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ungqr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines` 
-
-
diff --git a/docs/domains/lapack/ungtr.rst b/docs/domains/lapack/ungtr.rst
deleted file mode 100644
index 764a10f96..000000000
--- a/docs/domains/lapack/ungtr.rst
+++ /dev/null
@@ -1,153 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungtr:
-
-ungtr
-=====
-
-Generates the complex unitary matrix :math:`Q` determined by
-:ref:`onemkl_lapack_hetrd`.
-
-.. container:: section
-
-  .. rubric:: Description
-      
-``ungtr`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine explicitly generates the :math:`n \times n` unitary matrix
-:math:`Q` formed by :ref:`onemkl_lapack_hetrd` when
-reducing a complex Hermitian matrix :math:`A` to tridiagonal form:
-:math:`A = QTQ^H`. Use this routine after a call to
-:ref:`onemkl_lapack_hetrd`.
-
-ungtr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void ungtr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_hetrd`.
-
-n
-   The order of the matrix :math:`Q` :math:`(0 \le n)`.
-
-a
-   The buffer ``a`` as returned by
-   :ref:`onemkl_lapack_hetrd`. The
-   second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-tau
-   The buffer ``tau`` as returned by
-   :ref:`onemkl_lapack_hetrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1, n-1)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ungtr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by the unitary matrix :math:`Q`.
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-ungtr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   The queue where the routine should be executed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_hetrd`.
-
-n
-   The order of the matrix :math:`Q` :math:`(0 \le n)`.
-
-a
-   The pointer to ``a`` as returned by
-   :ref:`onemkl_lapack_hetrd`. The
-   second dimension of ``a`` must be at least :math:`\max(1, n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le \text{lda})`.
-
-tau
-   The pointer to ``tau`` as returned by
-   :ref:`onemkl_lapack_hetrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1, n-1)`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_ungtr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-a
-   Overwritten by the unitary matrix :math:`Q`.
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
-
diff --git a/docs/domains/lapack/ungtr_scratchpad_size.rst b/docs/domains/lapack/ungtr_scratchpad_size.rst
deleted file mode 100644
index 6b91bbbaa..000000000
--- a/docs/domains/lapack/ungtr_scratchpad_size.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_ungtr_scratchpad_size:
-
-ungtr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_ungtr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``ungtr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_ungtr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-ungtr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t lda) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_ungtr` function will be performed.
-
-upper_lower
-   Must be ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_hetrd`.
-
-n
-   The order of the matrix :math:`Q` :math:`(0 \le n)`.
-
-lda
-   The leading dimension of ``a`` :math:`(n \le lda)`.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type T the scratchpad memory to be passed to :ref:`onemkl_lapack_ungtr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/lapack/unmqr.rst b/docs/domains/lapack/unmqr.rst
deleted file mode 100644
index 0515be510..000000000
--- a/docs/domains/lapack/unmqr.rst
+++ /dev/null
@@ -1,207 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_unmqr:
-
-unmqr
-=====
-
-Multiplies a complex matrix by the unitary matrix :math:`Q` of the QR
-factorization formed by :ref:`onemkl_lapack_geqrf`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``unmqr`` supports the following precisions.
-
-    .. list-table::
-       :header-rows: 1
-
-       * -  T
-       * -  ``std::complex<float>``
-       * -  ``std::complex<double>``
-
-The routine multiplies a rectangular complex :math:`m \times n` matrix :math:`C` by
-:math:`Q` or :math:`Q^H`, where :math:`Q` is the complex unitary matrix defined
-as a product of :math:`k` elementary reflectors :math:`H(i)` of order :math:`n`:
-:math:`Q = H(1)^HH(2)^H ... H(k)^H` as returned by the RQ factorization routine
-:ref:`onemkl_lapack_gerqf`.
-
-Depending on the parameters ``side`` and ``trans``, the routine can form one of
-the matrix products :math:`QC`, :math:`Q^HC`, :math:`CQ`, or :math:`CQ^H`
-(overwriting the result over :math:`C`).
-
-unmqr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &c, std::int64_t ldc, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{H}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{H}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::conjtrans``, the routine multiplies :math:`C`
-    by :math:`Q^{H}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q` 
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The buffer ``a`` as returned by :ref:`onemkl_lapack_geqrf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The buffer ``tau`` as returned by :ref:`onemkl_lapack_geqrf`.
-
-c
-    The buffer ``c`` contains the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by the
-    :ref:`onemkl_lapack_unmqr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{H}C`, :math:`CQ`, or
-    :math:`CQ^H` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-unmqr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{H}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{H}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::conjtrans``, the routine multiplies :math:`C`
-    by :math:`Q^{H}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q`
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The pointer to ``a`` as returned by :ref:`onemkl_lapack_geqrf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The pointer to ``tau`` as returned by :ref:`onemkl_lapack_geqrf`.
-
-c
-    The pointer ``c`` points to the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by
-    :ref:`onemkl_lapack_unmqr_scratchpad_size` function.
-
-events
-    List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{H}C`, :math:`CQ`, or
-    :math:`CQ^{H}` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
diff --git a/docs/domains/lapack/unmqr_scratchpad_size.rst b/docs/domains/lapack/unmqr_scratchpad_size.rst
deleted file mode 100644
index a95127507..000000000
--- a/docs/domains/lapack/unmqr_scratchpad_size.rst
+++ /dev/null
@@ -1,87 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_unmqr_scratchpad_size:
-
-unmqr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_unmqr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``unmqr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_unmqr` function should be able to hold.
-Calls to this routine must specify the template parameter
-explicitly.
-
-unmqr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-         
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc, std::int64_t &scratchpad_size) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_unmqr` function will be performed.
-
-side
-   If ``side=oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{H}` is
-   applied to :math:`C` from the left.
-
-   If ``side=oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{H}` is
-   applied to :math:`C` from the right.
-
-trans
-   If ``trans=oneapi::mkl::transpose::nontrans``, the routine multiplies
-   :math:`C` by :math:`Q`.
-
-   If ``trans=oneapi::mkl::transpose::conjtrans``, the routine multiplies
-   :math:`C` by :math:`Q^H`.
-
-m
-   The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-   The number of columns the matrix :math:`C` (:math:`0 \le n \le m`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-ldc
-   The leading dimension of ``c``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_unmqr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/unmrq.rst b/docs/domains/lapack/unmrq.rst
deleted file mode 100644
index 2b3872261..000000000
--- a/docs/domains/lapack/unmrq.rst
+++ /dev/null
@@ -1,207 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_unmrq:
-
-unmrq
-=====
-
-Multiplies a complex matrix by the unitary matrix :math:`Q` of the RQ
-factorization formed by :ref:`onemkl_lapack_gerqf`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``unmrq`` supports the following precisions.
-
-    .. list-table::
-       :header-rows: 1
-
-       * -  T
-       * -  ``std::complex<float>``
-       * -  ``std::complex<double>``
-
-The routine multiplies a rectangular complex :math:`m \times n` matrix :math:`C` by
-:math:`Q` or :math:`Q^H`, where :math:`Q` is the complex unitary matrix defined
-as a product of :math:`k` elementary reflectors :math:`H(i)` of order :math:`n`:
-:math:`Q = H(1)^HH(2)^H ... H(k)^H` as returned by the RQ factorization routine
-:ref:`onemkl_lapack_gerqf`.
-
-Depending on the parameters ``side`` and ``trans``, the routine can form one of
-the matrix products :math:`QC`, :math:`Q^HC`, :math:`CQ`, or :math:`CQ^H`
-(overwriting the result over :math:`C`).
-
-unmrq (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &c, std::int64_t ldc, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{H}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{H}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::conjtrans``, the routine multiplies :math:`C`
-    by :math:`Q^{H}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q` 
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The buffer ``a`` as returned by :ref:`onemkl_lapack_gerqf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The buffer ``tau`` as returned by :ref:`onemkl_lapack_gerqf`.
-
-c
-    The buffer ``c`` contains the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by
-    :ref:`onemkl_lapack_unmrq_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{H}C`, :math:`CQ`, or
-    :math:`CQ^H` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-unmrq (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-    The queue where the routine should be executed.
-
-side
-    If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^{H}` is applied
-    to :math:`C` from the left.
-
-    If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^{H}` is
-    applied to :math:`C` from the right.
-
-trans
-    If ``trans = oneapi::mkl::transpose::nontrans``, the routine multiplies
-    :math:`C` by :math:`Q`.
-
-    If ``trans = oneapi::mkl::transpose::conjtrans``, the routine multiplies :math:`C`
-    by :math:`Q^{H}`.
-
-m
-    The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-    The number of columns in the matrix :math:`C` (:math:`0 \le n`).
-
-k
-    The number of elementary reflectors whose product defines the
-    matrix :math:`Q`
-
-    If ``side = oneapi::mkl::side::left``, :math:`0 \le k \le m`
-
-    If ``side = oneapi::mkl::side::right``, :math:`0 \le k \le n`
-
-a
-    The pointer to ``a`` as returned by :ref:`onemkl_lapack_gerqf`.
-    The second dimension of ``a`` must be at least :math:`\max(1,k)`.
-
-lda
-    The leading dimension of ``a``.
-
-tau
-    The pointer to ``tau`` as returned by :ref:`onemkl_lapack_gerqf`.
-
-c
-    The pointer ``c`` points to the matrix :math:`C`. The second dimension of
-    ``c`` must be at least :math:`\max(1,n)`.
-
-ldc
-    The leading dimension of ``c``.
-
-scratchpad_size
-    Size of scratchpad memory as a number of floating point elements of type
-    ``T``. Size should not be less than the value returned by
-    :ref:`onemkl_lapack_unmrq_scratchpad_size` function.
-
-events
-    List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-
-c
-    Overwritten by the product :math:`QC`, :math:`Q^{H}C`, :math:`CQ`, or
-    :math:`CQ^{H}` (as specified by ``side`` and ``trans``).
-
-scratchpad
-    Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
diff --git a/docs/domains/lapack/unmrq_scratchpad_size.rst b/docs/domains/lapack/unmrq_scratchpad_size.rst
deleted file mode 100644
index 8b771db75..000000000
--- a/docs/domains/lapack/unmrq_scratchpad_size.rst
+++ /dev/null
@@ -1,79 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_unmrq_scratchpad_size:
-
-unmrq_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_unmrq` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``unmrq_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-  
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_unmrq` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-unmrq_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-         
-queue
-   Device queue where calculations by the unmrq function will be performed.
-
-side
-   If ``side = oneapi::mkl::side::left``, :math:`Q` or :math:`Q^T` is applied to :math:`C` from the left. If ``side = oneapi::mkl::side::right``, :math:`Q` or :math:`Q^T` is applied to :math:`C` from the right.
-
-trans
-   If ``trans=oneapi::mkl::transpose::nontrans``, the routine multiplies :math:`C` by :math:`Q`.
-
-   If ``trans=oneapi::mkl::transpose::conjtrans``, the routine multiplies :math:`C` by :math:`Q^H`.
-
-m
-   The number of rows in the matrix :math:`C` (:math:`0 \le m`).
-
-n
-   The number of columns in the matrix :math:`C` (:math:`0 \le n \le m`).
-
-k
-   The number of elementary reflectors whose product defines the matrix :math:`Q` (:math:`0 \le k \le n`).
-
-lda
-   The leading dimension of ``a``.
-
-ldc
-   The leading dimension of ``c``.
-
-.. container:: section
-
-  .. rubric:: Return Value
-
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_unmrq` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-linear-equation-routines`
-
diff --git a/docs/domains/lapack/unmtr.rst b/docs/domains/lapack/unmtr.rst
deleted file mode 100644
index 0c156b4dc..000000000
--- a/docs/domains/lapack/unmtr.rst
+++ /dev/null
@@ -1,250 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_unmtr:
-
-unmtr
-=====
-
-Multiplies a complex matrix by the complex unitary matrix Q
-determined by
-:ref:`onemkl_lapack_hetrd`.
-
-.. container:: section
-
-  .. rubric:: Description
-
-``unmtr`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-The routine multiplies a complex matrix :math:`C` by :math:`Q` or
-:math:`Q^{H}`, where :math:`Q` is the unitary matrix :math:`Q` formed by
-:ref:`onemkl_lapack_hetrd`
-when reducing a complex Hermitian matrix :math:`A` to tridiagonal form:
-:math:`A = QTQ^H`. Use this routine after a call to
-:ref:`onemkl_lapack_hetrd`.
-
-Depending on the parameters ``side`` and ``trans``, the routine can
-form one of the matrix products :math:`QC`, :math:`Q^{H}C`,
-:math:`CQ`, or :math:`CQ^{H}` (overwriting the result on :math:`C`).
-
-unmtr (Buffer Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<T,1> &a, std::int64_t lda, sycl::buffer<T,1> &tau, sycl::buffer<T,1> &c, std::int64_t ldc, sycl::buffer<T,1> &scratchpad, std::int64_t scratchpad_size)
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-In the descriptions below, ``r`` denotes the order of :math:`Q`:
-
-.. container:: tablenoborder
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  :math:`r`\ =\ :math:`m` 
-          -  if ``side = side::left`` 
-        * -  :math:`r`\ =\ :math:`n` 
-          -  if ``side = side::right`` 
-
-queue
-   The queue where the routine should be executed.
-
-side
-   Must be either ``side::left`` or ``side::right``.
-
-   If ``side=side::left``, :math:`Q` or :math:`Q^{H}` is applied
-   to :math:`C` from the left.
-
-   If ``side=side::right``, :math:`Q` or :math:`Q^{H}` is applied
-   to :math:`C` from the right.
-
-upper_lower
-   Must be either ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_hetrd`.
-
-trans
-   Must be either ``transpose::nontrans`` or
-   ``transpose::conjtrans``.
-
-   If ``trans=transpose::nontrans``, the routine multiplies :math:`C` by
-   :math:`Q`.
-
-   If ``trans=transpose::conjtrans``, the routine multiplies :math:`C` by
-   :math:`Q^{H}`.
-
-m
-   The number of rows in the matrix :math:`C` (:math:`m \ge 0`).
-
-n
-   The number of columns the matrix :math:`C` (:math:`n \ge 0`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-a
-   The buffer ``a`` as returned by
-   :ref:`onemkl_lapack_hetrd`.
-
-lda
-   The leading dimension of ``a`` :math:`(\max(1,r) \le \text{lda})`.
-
-tau
-   The buffer ``tau`` as returned by
-   :ref:`onemkl_lapack_hetrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1,r-1)`.
-
-c
-   The buffer ``c`` contains the matrix :math:`C`. The second dimension of ``c``
-   must be at least :math:`\max(1,n)`.
-
-ldc
-   The leading dimension of ``c`` :math:`(\max(1,n) \le \text{ldc})`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_unmtr_scratchpad_size` function.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-c
-   Overwritten by the product :math:`QC`, :math:`Q^{H}C`,
-   :math:`CQ`, or :math:`CQ^{H}` (as specified by ``side`` and
-   ``trans``).
-
-scratchpad
-   Buffer holding scratchpad memory to be used by routine for storing intermediate results.
-
-unmtr (USM Version)
-----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T *a, std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, const std::vector<sycl::event> &events = {})
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-      
-In the descriptions below, ``r`` denotes the order of :math:`Q`:
-
-.. container:: tablenoborder
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  :math:`r`\ =\ :math:`m` 
-          -  if ``side = side::left`` 
-        * -  :math:`r`\ =\ :math:`n` 
-          -  if ``side = side::right`` 
-
-queue
-   The queue where the routine should be executed.
-
-side
-   Must be either ``side::left`` or ``side::right``.
-
-   If ``side=side::left``, :math:`Q` or :math:`Q^{H}` is applied
-   to :math:`C` from the left.
-
-   If ``side=side::right``, :math:`Q` or :math:`Q^{H}` is applied
-   to :math:`C` from the right.
-
-upper_lower
-   Must be either ``uplo::upper`` or ``uplo::lower``. Uses the same
-   ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_hetrd`.
-
-trans
-   Must be either ``transpose::nontrans`` or
-   ``transpose::conjtrans``.
-
-   If ``trans=transpose::nontrans``, the routine multiplies :math:`C` by
-   :math:`Q`.
-
-   If ``trans=transpose::conjtrans``, the routine multiplies :math:`C` by
-   :math:`Q^{H}`.
-
-m
-   The number of rows in the matrix :math:`C` (:math:`m \ge 0`).
-
-n
-   The number of columns the matrix :math:`C` (:math:`n \ge 0`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-a
-   The pointer to ``a`` as returned by
-   :ref:`onemkl_lapack_hetrd`.
-
-lda
-   The leading dimension of ``a`` :math:`(\max(1,r) \le \text{lda})`.
-
-tau
-   The pointer to ``tau`` as returned by
-   :ref:`onemkl_lapack_hetrd`. The
-   dimension of ``tau`` must be at least :math:`\max(1,r-1)`.
-
-c
-   The array ``c`` contains the matrix :math:`C`. The second dimension of ``c``
-   must be at least :math:`\max(1,n)`.
-
-ldc
-   The leading dimension of ``c`` :math:`(\max(1,n) \le \text{ldc})`.
-
-scratchpad_size
-   Size of scratchpad memory as a number of floating point elements of type ``T``.
-   Size should not be less than the value returned by :ref:`onemkl_lapack_unmtr_scratchpad_size` function.
-
-events
-   List of events to wait for before starting computation. Defaults to empty list.
-
-.. container:: section
-
-  .. rubric:: Output Parameters
-      
-c
-   Overwritten by the product :math:`QC`, :math:`Q^{H}C`,
-   :math:`CQ`, or :math:`CQ^{H}` (as specified by ``side`` and
-   trans).
-
-scratchpad
-   Pointer to scratchpad memory to be used by routine for storing intermediate results.
-
-.. container:: section
-
-  .. rubric:: Return Values
-
-Output event to wait on to ensure computation is complete.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/lapack/unmtr_scratchpad_size.rst b/docs/domains/lapack/unmtr_scratchpad_size.rst
deleted file mode 100644
index 8ca39d3f0..000000000
--- a/docs/domains/lapack/unmtr_scratchpad_size.rst
+++ /dev/null
@@ -1,96 +0,0 @@
-.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation
-..
-.. SPDX-License-Identifier: CC-BY-4.0
-
-.. _onemkl_lapack_unmtr_scratchpad_size:
-
-unmtr_scratchpad_size
-=====================
-
-Computes size of scratchpad memory required for :ref:`onemkl_lapack_unmtr` function.
-
-.. container:: section
-
-  .. rubric:: Description
-         
-``unmtr_scratchpad_size`` supports the following precisions.
-
-     .. list-table:: 
-        :header-rows: 1
-
-        * -  T 
-        * -  ``std::complex<float>`` 
-        * -  ``std::complex<double>`` 
-
-Computes the number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_unmtr` function should be able to hold.
-Calls to this routine must specify the template parameter explicitly.
-
-unmtr_scratchpad_size
----------------------
-
-.. container:: section
-
-  .. rubric:: Syntax
-
-.. code-block:: cpp
-
-    namespace oneapi::mkl::lapack {
-      template <typename T>
-      std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) 
-    }
-
-.. container:: section
-
-  .. rubric:: Input Parameters
-
-queue
-   Device queue where calculations by :ref:`onemkl_lapack_unmtr` function will be performed.
-
-side
-   Must be either ``side::left`` or ``side::right``.
-
-   If ``side=side::left``, :math:`Q` or :math:`Q^{H}` is
-   applied to :math:`C` from the left.
-
-   If ``side=side::right``, :math:`Q` or :math:`Q^{H}` is
-   applied to :math:`C` from the right.
-
-upper_lower
-   Must be either ``uplo::upper`` or ``uplo::lower``. Uses the
-   same ``upper_lower`` as supplied to
-   :ref:`onemkl_lapack_hetrd`.
-
-trans
-   Must be either ``transpose::nontrans`` or
-   ``transpose::conjtrans``.
-
-   If ``trans=transpose::nontrans``, the routine multiplies :math:`C`
-   by :math:`Q`.
-
-   If ``trans=transpose::conjtrans``, the routine multiplies :math:`C`
-   by :math:`Q^{H}`.
-
-m
-   The number of rows in the matrix :math:`C` (:math:`m \ge 0`).
-
-n
-   The number of columns the matrix :math:`C` (:math:`n \ge 0`).
-
-k
-   The number of elementary reflectors whose product defines the
-   matrix :math:`Q` (:math:`0 \le k \le n`).
-
-lda
-   The leading dimension of :math:`a` :math:`(\max(1,r) \le \text{lda})`.
-
-ldc
-   The leading dimension of :math:`c` :math:`(\max(1,n) \le \text{ldc})`.
-
-.. container:: section
-
-  .. rubric:: Return Value
-         
-The number of elements of type ``T`` the scratchpad memory to be passed to :ref:`onemkl_lapack_unmtr` function should be able to hold.
-
-**Parent topic:** :ref:`onemkl_lapack-singular-value-eigenvalue-routines`
-
diff --git a/docs/domains/matrix-storage.rst b/docs/domains/matrix-storage.rst
deleted file mode 100644
index 656262f55..000000000
--- a/docs/domains/matrix-storage.rst
+++ /dev/null
@@ -1,581 +0,0 @@
-.. _matrix-storage:
-
-Matrix Storage
-==============
-
-
-.. container::
-
-
-   The oneMKL BLAS and LAPACK routines for DPC++ use several matrix and
-   vector storage formats. These are the same formats used in
-   traditional Fortran BLAS/LAPACK.
-
-   .. container:: section
-
-      .. rubric:: General Matrix
-         :name: general-matrix
-         :class: sectiontitle
-
-      A general matrix ``A`` of ``m`` rows and ``n`` columns with
-      leading dimension ``lda`` is represented as a one dimensional
-      array ``a`` of size of at least ``lda`` \* ``n`` if column major
-      layout is used and at least ``lda`` \* ``m`` if row major layout
-      is used.  Before entry in any BLAS function using a general
-      matrix, the leading ``m`` by ``n`` part of the array ``a`` must
-      contain the matrix ``A``. For column (respectively row) major
-      layout, the elements of each column (respectively row) are
-      contiguous in memory while the elements of each row
-      (respectively column) are at distance ``lda`` from the element
-      in the same row (respectively column) and the previous column
-      (respectively row).
-
-      Visually, the matrix
-
-      .. math::
-            
-         A = \begin{bmatrix}
-             A_{11} & A_{12} & A_{13} & \ldots & A_{1n}\\
-             A_{21} & A_{22} & A_{23} & \ldots & A_{2n}\\
-             A_{31} & A_{32} & A_{33} & \ldots & A_{3n}\\
-             \vdots & \vdots & \vdots & \ddots & \vdots\\
-             A_{m1} & A_{m2} & A_{m3} & \ldots & A_{mn}
-             \end{bmatrix}
-
-      is stored in memory as an array
-
-      - For column major layout,
-
-      .. math::
-         
-         \scriptstyle a = 
-            [\underbrace{\underbrace{A_{11},A_{21},A_{31},...,A_{m1},*,...,*}_\text{lda},
-                         \underbrace{A_{12},A_{22},A_{32},...,A_{m2},*,...,*}_\text{lda},
-                         ...,
-                         \underbrace{A_{1n},A_{2n},A_{3n},...,A_{mn},*,...,*}_\text{lda}}
-                         _\text{lda x n}]
-      
-      - For row major layout,
-
-      .. math::
-         
-         \scriptstyle a = 
-            [\underbrace{\underbrace{A_{11},A_{12},A_{13},...,A_{1n},*,...,*}_\text{lda},
-                         \underbrace{A_{21},A_{22},A_{23},...,A_{2n},*,...,*}_\text{lda},
-                         ...,
-                         \underbrace{A_{m1},A_{m2},A_{m3},...,A_{mn},*,...,*}_\text{lda}}
-                         _\text{m x lda}]
-
-   .. container:: section
-
-      .. rubric:: Triangular Matrix
-         :name: triangular-matrix
-         :class: sectiontitle
-
-      A triangular matrix ``A`` of ``n`` rows and ``n`` columns with
-      leading dimension ``lda`` is represented as a one dimensional
-      array ``a``, of a size of at least ``lda`` \* ``n``. When column
-      (respectively row) major layout is used, the elements of each
-      column (respectively row) are contiguous in memory while the
-      elements of each row (respectively column) are at distance
-      ``lda`` from the element in the same row (respectively column)
-      and the previous column (respectively row).
-
-      Before entry in any BLAS function using a triangular matrix,
-
-      -  If ``upper_lower = uplo::upper``, the leading ``n`` by ``n``
-         upper triangular part of the array ``a`` must contain the upper
-         triangular part of the matrix ``A``. The strictly lower
-         triangular part of the array ``a`` is not referenced. In other
-         words, the matrix
-
-         .. math::
-
-            A = \begin{bmatrix}
-                A_{11} & A_{12} & A_{13} & \ldots & A_{1n}\\
-                *      & A_{22} & A_{23} & \ldots & A_{2n}\\
-                *      & *      & A_{33} & \ldots & A_{3n}\\
-                \vdots & \vdots & \vdots & \ddots & \vdots\\
-                *      & *      & *      & \ldots & A_{nn}
-                \end{bmatrix}
-
-         is stored in memory as the array
-
-         - For column major layout,
-
-         .. math::
-            
-            \scriptstyle a = 
-               [\underbrace{\underbrace{A_{11},*,...,*}_\text{lda},
-                            \underbrace{A_{12},A_{22},*,...,*}_\text{lda},
-                            ...,
-                            \underbrace{A_{1n},A_{2n},A_{3n},...,A_{nn},*,...,*}_\text{lda}}
-                            _\text{lda x n}]
-
-         - For row major layout,
-
-         .. math::
-            
-            \scriptstyle a = 
-               [\underbrace{\underbrace{A_{11},A_{12},A_{13},...,A_{1n},*,...,*}_\text{lda},
-                            \underbrace{*,A_{22},A_{23},...,A_{2n},*,...,*}_\text{lda},
-                            ...,
-                            \underbrace{*,...,*,A_{nn},*,...,*}_\text{lda}}
-                            _\text{lda x n}]
-
-      -  If ``upper_lower = uplo::lower``, the leading ``n`` by ``n``
-         lower triangular part of the array ``a`` must contain the lower
-         triangular part of the matrix ``A``. The strictly upper
-         triangular part of the array ``a`` is not referenced. That is,
-         the matrix
-
-         .. math::
-
-            A = \begin{bmatrix}
-                A_{11} & *      & *      & \ldots & *     \\
-                A_{21} & A_{22} & *      & \ldots & *     \\
-                A_{31} & A_{32} & A_{33} & \ldots & *     \\
-                \vdots & \vdots & \vdots & \ddots & \vdots\\
-                A_{n1} & A_{n2} & A_{n3} & \ldots & A_{nn}
-                \end{bmatrix}
-
-         is stored in memory as the array
-
-         - For column major layout,
-      
-         .. math::
-                  
-            \scriptstyle a = 
-               [\underbrace{\underbrace{A_{11},A_{21},A_{31},..,A_{n1},*,...,*}_\text{lda},
-                            \underbrace{*,A_{22},A_{32},...,A_{n2},*,...,*}_\text{lda},
-                            ...,
-                            \underbrace{*,...,*,A_{nn},*,...,*}_\text{lda}}
-                            _\text{lda x n}]
-
-         - For row major layout,
-
-         .. math::
-                  
-            \scriptstyle a = 
-               [\underbrace{\underbrace{A_{11},*,...,*}_\text{lda},
-                            \underbrace{A_{21},A_{22},*,...,*}_\text{lda},
-                            ...,
-                            \underbrace{A_{n1},A_{n2},A_{n3},...,A_{nn},*,...,*}_\text{lda}}
-                            _\text{lda x n}]
-
-   .. container:: section
-
-      .. rubric:: Band Matrix
-         :name: band-matrix
-         :class: sectiontitle
-
-      A general band matrix ``A`` of ``m`` rows and ``n`` columns with
-      ``kl`` sub-diagonals, ``ku`` super-diagonals, and leading
-      dimension ``lda`` is represented as a one dimensional array
-      ``a`` of a size of at least ``lda`` \* ``n`` (respectively
-      ``lda`` \* ``m``) if column (respectively row) major layout is
-      used.
-
-      Before entry in any BLAS function using a general band matrix,
-      the leading (``kl`` + ``ku`` + 1\ ``)`` by ``n`` (respectively
-      ``m``) part of the array ``a`` must contain the matrix
-      ``A``. This matrix must be supplied column-by-column
-      (respectively row-by-row), with the main diagonal of the matrix
-      in row ``ku`` (respectively ``kl``) of the array (0-based
-      indexing), the first super-diagonal starting at position 1
-      (respectively 0) in row (``ku`` - 1) (respectively column
-      (``kl`` + 1)), the first sub-diagonal starting at position 0
-      (respectively 1) in row (``ku`` + 1) (respectively column
-      (``kl`` - 1)), and so on. Elements in the array ``a`` that do
-      not correspond to elements in the band matrix (such as the top
-      left ``ku`` by ``ku`` triangle) are not referenced.
-
-      Visually, the matrix ``A``
-
-      .. math::
-
-         A = \left[\begin{smallmatrix}
-             A_{11}     & A_{12}     & A_{13}     & \ldots & A_{1,ku+1} & *          & \ldots     & \ldots     & \ldots & \ldots    & \ldots    & *         \\
-             A_{21}     & A_{22}     & A_{23}     & A_{24} & \ldots     & A_{2,ku+2} & *          & \ldots     & \ldots & \ldots    & \ldots    & *         \\
-             A_{31}     & A_{32}     & A_{33}     & A_{34} & A_{35}     & \ldots     & A_{3,ku+3} & *          & \ldots & \ldots    & \ldots    & *         \\
-             \vdots     & A_{42}     & A_{43}     & \ddots & \ddots     & \ddots     & \ddots     & \ddots     & *      & \ldots    & \ldots    & \vdots    \\
-             A_{kl+1,1} & \vdots     & A_{53}     & \ddots & \ddots     & \ddots     & \ddots     & \ddots     & \ddots & *         & \ldots    & \vdots    \\
-             *          & A_{kl+2,2} & \vdots     & \ddots & \ddots     & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & \vdots    \\
-             \vdots     & *          & A_{kl+3,3} & \ddots & \ddots     & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & *         \\
-             \vdots     & \vdots     & *          & \ddots & \ddots     & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & A_{n-ku,n}\\
-             \vdots     & \vdots     & \vdots     & *      & \ddots     & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & \vdots    \\
-             \vdots     & \vdots     & \vdots     & \vdots & *          & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & A_{m-2,n} \\
-             \vdots     & \vdots     & \vdots     & \vdots & \vdots     & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & A_{m-1,n} \\
-             *          & *          & *          & \ldots & \ldots     & \ldots     & *          & A_{m,m-kl} & \ldots & A_{m,n-2} & A_{m,n-1} & A_{m,n} 
-             \end{smallmatrix}\right]
-
-
-      is stored in memory as an array
-
-      - For column major layout,
-        
-      .. math::
-               
-         \scriptscriptstyle a = 
-            [\underbrace{
-             \underbrace{\underbrace{*,...,*}_\text{ku},A_{11}, A_{12},...,A_{min(kl+1,m),1},*,...,*}_\text{lda},
-             \underbrace{\underbrace{*,...,*}_\text{ku-1},A_{max(1,2-ku),2},...,A_{min(kl+2,m),2},*,...*}_\text{lda},
-             ...,
-             \underbrace{\underbrace{*,...,*}_\text{max(0,ku-n+1)},A_{max(1,n-ku),n},...,A_{min(kl+n,m),n},*,...*}_\text{lda}
-             }_\text{lda x n}]
-
-
-      - For row major layout,
-
-      .. math::
-               
-         \scriptscriptstyle a = 
-            [\underbrace{
-             \underbrace{\underbrace{*,...,*}_\text{kl},A_{11}, A_{12},...,A_{1,min(ku+1,n)},*,...,*}_\text{lda},
-             \underbrace{\underbrace{*,...,*}_\text{kl-1},A_{2,max(1,2-kl)},...,A_{2,min(ku+2,n)},*,...*}_\text{lda},
-             ...,
-             \underbrace{\underbrace{*,...,*}_\text{max(0,kl-m+1)},A_{m,max(1,m-kl)},...,A_{m,min(ku+m,n)},*,...*}_\text{lda}
-             }_\text{lda x m}]
-
-      The following program segment transfers a band matrix from
-      conventional full matrix storage (variable ``matrix``, with
-      leading dimension ``ldm``) to band storage (variable ``a``, with
-      leading dimension ``lda``):
-
-
-      - Using matrices stored with column major layout,
-        
-      ::
-
-         for (j = 0; j < n; j++) {
-             k = ku – j;
-             for (i = max(0, j – ku); i < min(m, j + kl + 1); i++) {
-                 a[(k + i) + j * lda] = matrix[i + j * ldm];
-             }
-         }
-
-      - Using matrices stored with row major layout,
-
-      ::
-
-         for (i = 0; i < m; i++) {
-             k = kl – i;
-             for (j = max(0, i – kl); j < min(n, i + ku + 1); j++) {
-                 a[(k + j) + i * lda] = matrix[j + i * ldm];
-             }
-         }
-        
-
-   .. container:: section
-
-      .. rubric:: Triangular Band Matrix
-         :name: triangular-band-matrix
-         :class: sectiontitle
-
-      A triangular band matrix ``A`` of ``n`` rows and ``n`` columns
-      with ``k`` sub/super-diagonals and leading dimension ``lda`` is
-      represented as a one dimensional array ``a`` of size at least
-      ``lda`` \* ``n``.
-
-      Before entry in any BLAS function using a triangular band matrix,
-
-
-      - If ``upper_lower = uplo::upper``, the leading (``k`` + 1) by ``n``
-        part of the array ``a`` must contain the upper
-        triangular band part of the matrix ``A``. When using column
-        major layout, this matrix must be supplied column-by-column
-        (respectively row-by-row) with the main diagonal of the
-        matrix in row (``k``) (respectively column 0) of the array,
-        the first super-diagonal starting at position 1
-        (respectively 0) in row (``k`` - 1) (respectively column 1),
-        and so on. Elements in the array ``a`` that do not correspond
-        to elements in the triangular band matrix (such as the top
-        left ``k`` by ``k`` triangle) are not referenced.
-
-        Visually, the matrix
-
-        .. math::
-
-           A = \left[\begin{smallmatrix}
-               A_{11}     & A_{12}     & A_{13}     & \ldots & A_{1,k+1} & *          & \ldots      & \ldots     & \ldots & \ldots    & \ldots    & *         \\
-               *          & A_{22}     & A_{23}     & A_{24} & \ldots     & A_{2,k+2} & *           & \ldots     & \ldots & \ldots    & \ldots    & *         \\
-               \vdots     & *          & A_{33}     & A_{34} & A_{35}     & \ldots     & A_{3,k+3}  & *          & \ldots & \ldots    & \ldots    & *         \\
-               \vdots     & \vdots     & *          & \ddots & \ddots     & \ddots     & \ddots     & \ddots     & *      & \ldots    & \ldots    & \vdots    \\
-               \vdots     & \vdots     & \vdots     & \ddots & \ddots     & \ddots     & \ddots     & \ddots     & \ddots & *         & \ldots    & \vdots    \\
-               \vdots     & \vdots     & \vdots     & \vdots & \ddots     & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & \vdots    \\
-               \vdots     & \vdots     & \vdots     & \vdots & \vdots     & \ddots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & *         \\
-               \vdots     & \vdots     & \vdots     & \vdots & \vdots     & \vdots     & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & A_{n-k,n}\\
-               \vdots     & \vdots     & \vdots     & \vdots & \vdots     & \vdots     & \vdots     & \ddots     & \ddots & \ddots    & \ddots    & \vdots    \\
-               \vdots     & \vdots     & \vdots     & \vdots & \vdots     & \vdots     & \vdots     & \vdots     & \ddots & \ddots    & \ddots    & A_{n-2,n} \\
-               \vdots     & \vdots     & \vdots     & \vdots & \vdots     & \vdots     & \vdots     & \vdots     & \vdots & \ddots    & \ddots    & A_{n-1,n} \\
-               *          & *          & *          & \ldots & \ldots     & \ldots     & \ldots     & \ldots     & \ldots & \ldots    & *         & A_{n,n} 
-               \end{smallmatrix}\right]
-
-        is stored as an array
-
-      .. container:: fignone
-                            
-         - For column major layout,
-                
-            .. math::
-                     
-               \scriptstyle a = 
-                  [\underbrace{
-                   \underbrace{\underbrace{*,...,*}_\text{ku},A_{11},*,...,*}_\text{lda},
-                   \underbrace{\underbrace{*,...,*}_\text{ku-1},A_{max(1,2-k),2},...,A_{2,2},*,...*}_\text{lda},
-                   ...,
-                   \underbrace{\underbrace{*,...,*}_\text{max(0,k-n+1)},A_{max(1,n-k),n},...,A_{n,n},*,...*}_\text{lda}
-                   }_\text{lda x n}]
-
-
-         - For row major layout,
-            
-            .. math::
-                     
-               \scriptstyle a = 
-                  [\underbrace{
-                   \underbrace{A_{11},A_{21},...,A_{min(k+1,n),1},*,...,*}_\text{lda},
-                   \underbrace{A_{2,2},...,A_{min(k+2,n),2},*,...,*}_\text{lda},
-                   ...,
-                   \underbrace{A_{n,n},*,...*}_\text{lda}
-                   }_\text{lda x n}]
-
-         The following program segment transfers a band matrix from
-         conventional full matrix storage (variable ``matrix``, with
-         leading dimension ``ldm``) to band storage (variable ``a``,
-         with leading dimension ``lda``):
-
-         - Using matrices stored with column major layout,
-
-         ::
-
-            for (j = 0; j < n; j++) {
-                m = k – j;
-                for (i = max(0, j – k); i <= j; i++) {
-                    a[(m + i) + j * lda] = matrix[i + j * ldm];
-                }
-            }
-
-         - Using matrices stored with column major layout,
-
-         ::
-
-            for (i = 0; i < n; i++) {
-                m = –i;
-                for (j = i; j < min(n, i + k + 1); j++) {
-                    a[(m + j) + i * lda] = matrix[j + i * ldm];
-                }
-            }
-
-      - If ``upper_lower = uplo::lower``, the leading (``k`` + 1) by ``n``
-        part of the array ``a`` must contain the upper triangular
-        band part of the matrix ``A``. This matrix must be supplied
-        column-by-column with the main diagonal of the matrix in row 0
-        of the array, the first sub-diagonal starting at position 0 in
-        row 1, and so on. Elements in the array ``a`` that do not
-        correspond to elements in the triangular band matrix (such as
-        the bottom right ``k`` by ``k`` triangle) are not referenced.
-
-        That is, the matrix
-
-        .. math::
-
-           A = \left[\begin{smallmatrix}
-               A_{11}     & *          & \ldots     & \ldots & \ldots     & \ldots    & \ldots     & \ldots     & \ldots & \ldots    & \ldots    & *         \\
-               A_{21}     & A_{22}     & *          & \ldots & \ldots     & \ldots    & \ldots     & \ldots     & \ldots & \ldots    & \ldots    & *         \\
-               A_{31}     & A_{32}     & A_{33}     & *      & \ldots     & \ldots    & \ldots     & \ldots     & \ldots & \ldots    & \ldots    & *         \\
-               \vdots     & A_{42}     & A_{43}     & \ddots & \ddots     & \ldots    & \ldots     & \ldots     & \ldots & \ldots    & \ldots    & \vdots    \\
-               A_{k+1,1}  & \vdots     & A_{53}     & \ddots & \ddots     & \ddots    & \ldots     & \ldots     & \ldots & \ldots    & \ldots    & \vdots    \\
-               *          & A_{k+2,2}  & \vdots     & \ddots & \ddots     & \ddots    & \ddots     & \ldots     & \ldots & \ldots    & \ldots    & \vdots    \\
-               \vdots     & *          & A_{k+3,3}  & \ddots & \ddots     & \ddots    & \ddots     & \ddots     & \ldots & \ldots    & \ldots    & \vdots    \\
-               \vdots     & \vdots     & *          & \ddots & \ddots     & \ddots    & \ddots     & \ddots     & \ddots & \ldots    & \ldots    & \vdots    \\
-               \vdots     & \vdots     & \vdots     & *      & \ddots     & \ddots    & \ddots     & \ddots     & \ddots & \ddots    & \ldots    & \vdots    \\
-               \vdots     & \vdots     & \vdots     & \vdots & *          & \ddots    & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & \vdots    \\
-               \vdots     & \vdots     & \vdots     & \vdots & \vdots     & \ddots    & \ddots     & \ddots     & \ddots & \ddots    & \ddots    & *         \\
-               *          & *          & *          & \ldots & \ldots     & \ldots    & *          & A_{n,n-k}  & \ldots & A_{n,n-2} & A_{n,n-1} & A_{n,n} 
-               \end{smallmatrix}\right]
-
-
-        is stored as the array
-
-
-      .. container:: fignone
-
-         - For column major layout,
-
-           .. math::
-                    
-              \scriptstyle a = 
-                 [\underbrace{
-                  \underbrace{A_{11},A_{21},...,A_{min(k+1,n),1},*,...,*}_\text{lda},
-                  \underbrace{A_{2,2},...,A_{min(k+2,n),2},*,...,*}_\text{lda},
-                  ...,
-                  \underbrace{A_{n,n},*,...*}_\text{lda}
-                  }_\text{lda x n}]
-
-         - For row major layout,
-        
-            .. math::
-                     
-               \scriptstyle a = 
-                  [\underbrace{
-                   \underbrace{\underbrace{*,...,*}_\text{k},A_{11},*,...,*}_\text{lda},
-                   \underbrace{\underbrace{*,...,*}_\text{k-1},A_{max(1,2-k),2},...,A_{2,2},*,...*}_\text{lda},
-                   ...,
-                   \underbrace{\underbrace{*,...,*}_\text{max(0,k-n+1)},A_{max(1,n-k),n},...,A_{n,n},*,...*}_\text{lda}
-                   }_\text{lda x n}]
-
-
-         The following program segment transfers a band matrix from
-         conventional full matrix storage (variable ``matrix``, with
-         leading dimension ``ldm``) to band storage (variable ``a``,
-         with leading dimension ``lda``):
-
-         - Using matrices stored with column major layout,
-           
-         ::
-
-            for (j = 0; j < n; j++) {
-                m = –j;
-                for (i = j; i < min(n, j + k + 1); i++) {
-                    a[(m + i) + j * lda] = matrix[i + j * ldm];
-                }
-            }
-
-         - Using matrices stored with row major layout,
-
-         ::
-
-            for (i = 0; i < n; i++) {
-                m = k – i;
-                for (j = max(0, i – k); j <= i; j++) {
-                    a[(m + j) + i * lda] = matrix[j + i * ldm];
-                }
-            }
-
-
-   .. container:: section
-
-      .. rubric:: Packed Triangular Matrix
-         :name: packed-triangular-matrix
-         :class: sectiontitle
-
-      A triangular matrix ``A`` of ``n`` rows and ``n`` columns is
-      represented in packed format as a one dimensional array ``a`` of
-      size at least (``n``\ \*(``n`` + 1))/2. All elements in the upper
-      or lower part of the matrix ``A`` are stored contiguously in the
-      array ``a``.
-
-      Before entry in any BLAS function using a triangular packed
-      matrix,
-
-      - If ``upper_lower = uplo::upper``, if column (respectively row)
-        major layout is used, the first (``n``\ \*(``n`` + 1))/2
-        elements in the array ``a`` must contain the upper triangular
-        part of the matrix ``A`` packed sequentially, column by column
-        (respectively row by row) so that ``a``\ [0] contains ``A``\
-        :sub:`11`, ``a``\ [1] and ``a``\ [2] contain ``A``\ :sub:`12`
-        and ``A``\ :sub:`22` (respectively ``A``\ :sub:`13`)
-        respectively, and so on. Hence, the matrix
-
-        .. math::
-              
-           A = \begin{bmatrix}
-               A_{11} & A_{12} & A_{13} & \ldots & A_{1n}\\
-               *      & A_{22} & A_{23} & \ldots & A_{2n}\\
-               *      & *      & A_{33} & \ldots & A_{3n}\\
-               \vdots & \vdots & \vdots & \ddots & \vdots\\
-               *      & *      & *      & \ldots & A_{nn}
-               \end{bmatrix}
-
-        is stored as the array
-
-        - For column major layout,
-
-          .. math::
-             
-             \scriptstyle a = [A_{11},A_{12},A_{22},A_{13},A_{23},A_{33},...,A_{(n-1),n},A_{nn}]
-
-        - For row major layout,
-
-          .. math::
-             
-             \scriptstyle a = [A_{11},A_{12},A_{13},...,A_{1n},
-                  A_{22},A_{23},...,A_{2n},...,
-                  A_{(n-1),(n-1)},A_{(n-1),n},A_{nn}]
-
-      - If ``upper_lower = uplo::lower``, if column (respectively row)
-        major layout is used, the first (``n``\ \*(``n`` + 1))/2
-        elements in the array ``a`` must contain the lower triangular
-        part of the matrix ``A`` packed sequentially, column by column
-        (row by row) so that ``a``\ [0] contains ``A``\ :sub:`11`,
-        ``a``\ [1] and ``a``\ [2] contain ``A``\ :sub:`21` and ``A``\
-        :sub:`31` (respectively ``A``\ :sub:`22`) respectively, and so
-        on. The matrix
-
-         .. math::
-               
-            A = \begin{bmatrix}
-                A_{11} & *      & *      & \ldots & *     \\
-                A_{21} & A_{22} & *      & \ldots & *     \\
-                A_{31} & A_{32} & A_{33} & \ldots & *     \\
-                \vdots & \vdots & \vdots & \ddots & \vdots\\
-                A_{n1} & A_{n2} & A_{n3} & \ldots & A_{nn}
-                \end{bmatrix}
-
-         is stored as the array
-
-         - For column major layout,
-
-          .. math::
-             
-             \scriptstyle a = [A_{11},A_{21},A_{31},...,A_{n1},
-                  A_{22},A_{32},...,A_{n2},...,
-                  A_{(n-1),(n-1)},A_{n,(n-1)},A_{nn}]
-
-         - For row major layout,
-
-          .. math::
-             
-             \scriptstyle a = [A_{11},A_{21},A_{22},A_{31},A_{32},A_{33},...,A_{n,(n-1)},A_{nn}]
-
-   .. container:: section
-
-      .. rubric:: Vector
-         :name: vector
-         :class: sectiontitle
-
-      A vector ``X`` of ``n`` elements with increment ``incx`` is
-      represented as a one dimensional array ``x`` of size at least (1 +
-      (``n`` - 1) \* abs(``incx``)).
-
-      Visually, the vector
-
-      .. math::
-            
-            X = (X_{1},X_{2}, X_{3},...,X_{n})
-
-      is stored in memory as an array
-
-
-      .. math::
-               
-         \scriptstyle x = [\underbrace{
-             \underbrace{X_{1},*,...,*}_\text{incx},
-             \underbrace{X_{2},*,...,*}_\text{incx},
-             ...,
-             \underbrace{X_{n-1},*,...,*}_\text{incx},X_{n}
-             }_\text{1 + (n-1) x incx}] \quad if \:incx \:> \:0 
-
-      .. math::
-               
-         \scriptstyle x = [\underbrace{
-             \underbrace{X_{n},*,...,*}_\text{|incx|},
-             \underbrace{X_{n-1},*,...,*}_\text{|incx|},
-             ...,
-             \underbrace{X_{2},*,...,*}_\text{|incx|},X_{1}
-             }_\text{1 + (1-n) x incx}] \quad if \:incx \:< \:0 
-
-
-
-
diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst
index 07d90359a..915151a36 100644
--- a/docs/domains/sparse_linear_algebra.rst
+++ b/docs/domains/sparse_linear_algebra.rst
@@ -1,10 +1,10 @@
-.. _onemkl_sparse_linear_algebra:
+.. _onemath_sparse_linear_algebra:
 
 Sparse Linear Algebra
 =====================
 
 See the latest specification for the sparse domain `here
-<https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/domains/spblas/spblas>`_.
+<https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/spblas/spblas>`_.
 
 This page documents implementation specific or backend specific details of the
 sparse domain.
@@ -17,25 +17,25 @@ Currently known limitations:
 - All operations' algorithms except ``no_optimize_alg`` map to the default
   algorithm.
 - The required external workspace size is always 0 bytes.
-- ``oneapi::mkl::sparse::set_csr_data`` and
-  ``oneapi::mkl::sparse::set_coo_data`` functions cannot be used on a handle
+- ``oneapi::math::sparse::set_csr_data`` and
+  ``oneapi::math::sparse::set_coo_data`` functions cannot be used on a handle
   that has already been used for an operation or its optimize function. Doing so
-  will throw a ``oneapi::mkl::unimplemented`` exception.
-- Using ``spsv`` with the ``oneapi::mkl::sparse::spsv_alg::no_optimize_alg`` and
+  will throw a ``oneapi::math::unimplemented`` exception.
+- Using ``spsv`` with the ``oneapi::math::sparse::spsv_alg::no_optimize_alg`` and
   a sparse matrix that does not have the
-  ``oneapi::mkl::sparse::matrix_property::sorted`` property will throw a
-  ``oneapi::mkl::unimplemented`` exception.
+  ``oneapi::math::sparse::matrix_property::sorted`` property will throw a
+  ``oneapi::math::unimplemented`` exception.
 - Using ``spmm`` on Intel GPU with a sparse matrix that is
-  ``oneapi::mkl::transpose::conjtrans`` and has the
-  ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw a
-  ``oneapi::mkl::unimplemented`` exception.
+  ``oneapi::math::transpose::conjtrans`` and has the
+  ``oneapi::math::sparse::matrix_property::symmetric`` property will throw a
+  ``oneapi::math::unimplemented`` exception.
 - Using ``spmv`` with a sparse matrix that is
-  ``oneapi::mkl::transpose::conjtrans`` with a ``type_view``
+  ``oneapi::math::transpose::conjtrans`` with a ``type_view``
   ``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw a
-  ``oneapi::mkl::unimplemented`` exception.
+  ``oneapi::math::unimplemented`` exception.
 - Using ``spsv`` on Intel GPU with a sparse matrix that is
-  ``oneapi::mkl::transpose::conjtrans`` and will throw a
-  ``oneapi::mkl::unimplemented`` exception.
+  ``oneapi::math::transpose::conjtrans`` and will throw a
+  ``oneapi::math::unimplemented`` exception.
 - Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent
   synchronizations and copies to the host.
 
@@ -50,20 +50,20 @@ Currently known limitations:
   <https://docs.nvidia.com/cuda/cusparse/index.html#coordinate-coo>`_. Sparse
   operations using matrices with the COO format without the property
   ``matrix_property::sorted_by_rows`` or ``matrix_property::sorted`` will throw
-  a ``oneapi::mkl::unimplemented`` exception.
+  a ``oneapi::math::unimplemented`` exception.
 - Using ``spmm`` with the algorithm ``spmm_alg::csr_alg3`` and an ``opA`` other
   than ``transpose::nontrans`` or an ``opB`` ``transpose::conjtrans`` will throw
-  a ``oneapi::mkl::unimplemented`` exception.
+  a ``oneapi::math::unimplemented`` exception.
 - Using ``spmm`` with the algorithm ``spmm_alg::csr_alg3``,
   ``opB=transpose::trans`` and real fp64 precision will throw a
-  ``oneapi::mkl::unimplemented`` exception. This configuration can fail as of
+  ``oneapi::math::unimplemented`` exception. This configuration can fail as of
   CUDA 12.6.2, see the related issue
   `here<https://forums.developer.nvidia.com/t/cusparse-spmm-sample-failing-with-misaligned-address/311022>`_.
 - Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
-  throw a ``oneapi::mkl::unimplemented`` exception.
+  throw a ``oneapi::math::unimplemented`` exception.
 - Using ``spsv`` with the algorithm ``spsv_alg::no_optimize_alg`` may still
   perform some mandatory preprocessing.
-- oneMKL Interface does not provide a way to use non-default algorithms without
+- oneMath does not provide a way to use non-default algorithms without
   calling preprocess functions such as ``cusparseSpMM_preprocess`` or
   ``cusparseSpMV_preprocess``. Feel free to create an issue if this is needed.
 
@@ -71,9 +71,9 @@ Currently known limitations:
 Operation algorithms mapping
 ----------------------------
 
-The following tables describe how a oneMKL SYCL Interface algorithm maps to the
-backend's algorithms. Refer to the backend's documentation for a more detailed
-explanation of the algorithms.
+The following tables describe how a oneMath algorithm maps to the backend's
+algorithms. Refer to the backend's documentation for a more detailed explanation
+of the algorithms.
 
 Backends with no equivalent algorithms will fallback to the backend's default
 behavior.
diff --git a/docs/index.rst b/docs/index.rst
index e17eeff6c..7a906f1c9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,11 +1,11 @@
 ..
   Copyright 2020-2024 Intel Corporation
 
-.. _onemkl:
+.. _onemath:
 
-*****************
-oneMKL Interfaces
-*****************
+*******
+oneMath
+*******
 
 Contents
 ========
@@ -24,14 +24,12 @@ Contents
    building_the_project_with_dpcpp.rst
    building_the_project_with_adaptivecpp.rst
    building_and_running_tests.rst
-   using_onemkl_with_cmake.rst
+   using_onemath_with_cmake.rst
 
 .. toctree::
    :caption: Developer Reference
    :maxdepth: 2
    :includehidden:
 
-   onemkl-datatypes.rst
-   domains/dense_linear_algebra.rst
    domains/sparse_linear_algebra.rst
    create_new_backend.rst
diff --git a/docs/introduction.rst b/docs/introduction.rst
index 6bf86fc56..693e85f61 100644
--- a/docs/introduction.rst
+++ b/docs/introduction.rst
@@ -3,7 +3,7 @@
 Introduction
 ============
 
-oneMKL Interfaces is an open-source implementation of oneMKL Data Parallel C++
-(DPC++) interfaces according to the `oneMKL specification <https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/>`_
-that can work with multiple devices (backends) using device-specific
-libraries underneath.
+oneMath is an open-source implementation of the `oneMath specification
+<https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/>`_
+that can work with multiple devices using multiple libraries (backends)
+underneath.
diff --git a/docs/onemkl-datatypes.rst b/docs/onemkl-datatypes.rst
deleted file mode 100644
index 33a08e834..000000000
--- a/docs/onemkl-datatypes.rst
+++ /dev/null
@@ -1,140 +0,0 @@
-.. _onemkl_datatypes:
-
-oneMKL Defined Datatypes
-========================
-
-
-oneMKL BLAS and LAPACK for Data Parallel C++ (DPC++) introduces
-several new enumeration data types, which are type-safe versions of
-the traditional Fortran characters in BLAS and LAPACK. They are
-declared in ``types.hpp``, which is included automatically when
-you include ``mkl.hpp``. Like all oneMKL DPC++ functionality, they belong to the namespace ``oneapi::mkl``.
-
-
-Each enumeration value comes with two names: A single-character name
-(the traditional BLAS/LAPACK character) and a longer, descriptive
-name. The two names are exactly equivalent and may be used
-interchangeably.
-
-
-transpose
----------
-
-The ``transpose`` type specifies whether an input matrix should be
-transposed and/or conjugated. It can take the following values:
-
-
-.. list-table::
-   :header-rows: 1
-
-   * -  Short Name
-     -  Long Name
-     -  Description
-   * -  ``transpose::N``
-     -  ``transpose::nontrans``
-     -  Do not transpose or conjugate the matrix.
-   * -  ``transpose::T``
-     -  ``transpose::trans``
-     -  Transpose the matrix.
-   * -  ``transpose::C``
-     -  ``transpose::conjtrans``
-     -  Perform Hermitian transpose (transpose and conjugate). Only applicable to complex matrices.
-
-
-
-
-uplo
-----
-
-The ``uplo`` type specifies whether the lower or upper triangle of a riangular, symmetric, or Hermitian matrix should be accessed.
-
-It can take the following values:
-
-
-.. list-table::
-   :header-rows: 1
-
-   * -  Short Name
-     -  Long Name
-     -  Description
-   * -  ``uplo::U``
-     -  ``uplo::upper``
-     -  Access the upper triangle of the matrix.
-   * -  ``uplo::L``
-     -  ``uplo::lower``
-     -  Access the lower triangle of the matrix.
-
-
-
-
-In both cases, elements that are not in the selected triangle are not accessed or updated.
-
-
-diag
-----
-
-
-The ``diag`` type specifies the values on the diagonal of a triangular matrix. It can take the following values:
-
-
-.. list-table::
-   :header-rows: 1
-
-   * -  Short Name
-     -  Long Name
-     -  Description
-   * -  ``diag::N``
-     -  ``diag::nonunit``
-     -  The matrix is not unit triangular. The diagonal entries are stored with the matrix data.
-   * -  ``diag::U``
-     -  ``diag::unit``
-     -  The matrix is unit triangular (the diagonal entries are all 1s). The diagonal entries in the matrix data are not accessed.
-
-
-
-
-side
-----
-
-
-The ``side`` type specifies the order of matrix multiplication when one matrix has a special form (triangular, symmetric, or Hermitian):
-
-
-.. list-table::
-   :header-rows: 1
-
-   * -  Short Name
-     -  Long Name
-     -  Description
-   * -  ``side::L``
-     -  ``side::left``
-     -  The special form matrix is on the left in the multiplication.
-   * -  ``side::R``
-     -  ``side::right``
-     -  The special form matrix is on the right in the multiplication.
-
-
-offset
-------
-
-
-The ``offset`` type specifies whether the offset to apply to an output matrix is a fix offset, column offset or row offset. It can take the following values
-
-
-.. list-table::
-   :header-rows: 1
-
-   * -  Short Name
-     -  Long Name
-     -  Description
-   * -  ``offset::F``
-     -  ``offset::fix``
-     -  The offset to apply to the output matrix is fix, all the inputs in the ``C_offset`` matrix has the same value given by the first element in the ``co`` array.
-   * -  ``offset::C``
-     -  ``offset::column``
-     -  The offset to apply to the output matrix is a column offset, that is to say all the columns in the ``C_offset`` matrix are the same and given by the elements in the ``co`` array.
-   * -  ``offset::R``
-     -  ``offset::row``
-     -  The offset to apply to the output matrix is a row offset, that is to say all the rows in the ``C_offset`` matrix are the same and given by the elements in the ``co`` array.
-
-**Parent topic:** :ref:`onemkl`
diff --git a/docs/using_onemath_with_cmake.rst b/docs/using_onemath_with_cmake.rst
new file mode 100644
index 000000000..8dac6e092
--- /dev/null
+++ b/docs/using_onemath_with_cmake.rst
@@ -0,0 +1,90 @@
+.. _using_onemath_library_with_cmake:
+
+Using oneMath in your project with CMake
+========================================
+
+The CMake build tool can help you use oneMath in your own project. Instead of
+manually linking and including directories, you can use the CMake targets
+exported by the oneMath project. You can use oneMath in one of two forms, with
+the target names depending on the approach taken: 
+
+* you can use a previously installed copy, either from a binary distribution or
+  built from source. This can be imported using CMake's ``find_package``
+  command. See the section `using_from_installed_binary`_.
+* or you can have CMake automatically download and build oneMath as part of the
+  build process using CMake's FetchContent_ functionality.
+  See the section `using_with_fetchcontent`_.
+
+
+.. _using_from_installed_binary:
+
+Using an installed oneMath
+##########################
+
+If oneMath has been previously installed, either by building from source or as a
+distributed binary, they can be consumed using CMake using
+``find_package(oneMath REQUIRED)``. The compiler used for the target library or
+application should match that used to build oneMath.
+
+For example:
+
+.. code-block:: cmake
+
+    find_package(oneMath REQUIRED)
+    target_link_libraries(myTarget PRIVATE ONEMATH::onemath)
+
+Different targets can be used depending on the requirements of oneMath. 
+To link against the entire library, the ``ONEMATH::onemath`` target should be used.
+For specific domains, ``ONEMATH::onemath_<domain>`` should be used.
+And for specific backends, ``ONEMATH::onemath_<domain>_<backend>`` should be used.
+
+When using a binary, it may be useful to know the backends that were enabled
+during the build. To check for the existence of backends, CMake's ``if(TARGET
+<target>)`` construct can be used. For example, with the ``cufft`` backend:
+
+.. code-block:: cmake
+
+    if(TARGET ONEMATH::onemath_dft_cufft)
+        target_link_libraries(myTarget PRIVATE ONEMATH::onemath_dft_cufft)
+    else()
+        message(FATAL_ERROR "oneMath was not built with CuFFT backend")
+    endif()
+
+.. _using_with_fetchcontent:
+
+Using CMake's FetchContent
+##########################
+
+
+The FetchContent_ functionality of CMake can be used to download, build and
+install oneMath as part of the build.
+
+For example:
+
+.. code-block:: cmake
+
+    include(FetchContent)
+    set(BUILD_FUNCTIONAL_TESTS False)
+    set(BUILD_EXAMPLES False)
+    set(ENABLE_<BACKEND_NAME>_BACKEND True)
+    FetchContent_Declare(
+            onemath_library
+            GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
+            GIT_TAG develop
+    )
+    FetchContent_MakeAvailable(onemath_library)
+
+    target_link_libraries(myTarget PRIVATE onemath)
+
+The build parameters should be appropriately set before
+``FetchContent_Declare``. See :ref:`building_the_project_with_dpcpp` or
+:ref:`building_the_project_with_adaptivecpp`.
+
+To link against the main library with run-time dispatching, use the target
+``onemath``. To link against particular domains, use the target
+``onemath_<domain>``. For example, ``onemath_blas`` or ``onemath_dft``. To link
+against particular backends (as required for static dispatch of oneAPI calls to
+a particular backend), use the target ``onemath_<domain>_<backend>``. For
+example, ``onemath_dft_cufft``.
+
+.. _FetchContent: https://cmake.org/cmake/help/latest/module/FetchContent.html
diff --git a/docs/using_onemkl_with_cmake.rst b/docs/using_onemkl_with_cmake.rst
deleted file mode 100644
index 03f828bf4..000000000
--- a/docs/using_onemkl_with_cmake.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-.. _using_onemkl_interface_library_with_cmake:
-
-Using the oneMKL Interfaces in your project with CMake
-=============================================================
-
-The CMake build tool can help you use oneMKL Interfaces in your own project.
-Instead of manually linking and including directories, you can use the CMake targets
-exported by the oneMKL Interfaces project. You can use oneMKL in one of two
-forms, with the target names depending on the approach taken: 
-
-* you can use a previously installed copy, either from a binary distribution or
-  built from source. This can be imported using CMake's ``find_package``
-  command. See the section `using_from_installed_binary`_.
-* or you can have CMake automatically download and build oneMKL as part of the
-  build process using CMake's FetchContent_ functionality.
-  See the section `using_with_fetchcontent`_.
-
-
-.. _using_from_installed_binary:
-
-Using an installed oneMKL Interfaces
-####################################
-
-If the oneMKL Interfaces have been previously installed, either by building from
-source or as a distributed binary, they can be consumed using CMake using
-``find_package(oneMKL REQUIRED)``. The compiler used for the target library or
-application should match that used to build oneMKL Interfaces.
-
-For example:
-
-.. code-block:: cmake
-
-    find_package(oneMKL REQUIRED)
-    target_link_libraries(myTarget PRIVATE MKL::onemkl)
-
-Different targets can be used depending on the requirements of oneMKL. 
-To link against the entire library, the ``MKL::onemkl`` target should be used.
-For specific domains, ``MKL::onemkl_<domain>`` should be used.
-And for specific backends, ``MKL::onemkl_<domain>_<backend>`` should be used.
-
-When using a binary, it may be useful to know the backends that were enabled
-during the build. To check for the existence of backends, CMake's ``if(TARGET
-<target>)`` construct can be used. For example, with the ``cufft`` backend:
-
-.. code-block:: cmake
-
-    if(TARGET MKL::onemkl_dft_cufft)
-        target_link_libraries(myTarget PRIVATE MKL::onemkl_dft_cufft)
-    else()
-        message(FATAL_ERROR "oneMKL Interfaces was not built with CuFFT backend")
-    endif()
-
-.. _using_with_fetchcontent:
-
-Using CMake's FetchContent
-##########################
-
-
-The FetchContent_ functionality of CMake can be used to download, build and
-install oneMKL Interfaces as part of the build.
-
-For example:
-
-.. code-block:: cmake
-
-    include(FetchContent)
-    set(BUILD_FUNCTIONAL_TESTS False)
-    set(BUILD_EXAMPLES False)
-    set(ENABLE_<BACKEND_NAME>_BACKEND True)
-    FetchContent_Declare(
-            onemkl_interface_library
-            GIT_REPOSITORY https://github.com/oneapi-src/oneMKL.git
-            GIT_TAG develop
-    )
-    FetchContent_MakeAvailable(onemkl_interface_library)
-
-    target_link_libraries(myTarget PRIVATE onemkl)
-
-The build parameters should be appropriately set before
-``FetchContent_Declare``. See :ref:`building_the_project_with_dpcpp` or
-:ref:`building_the_project_with_adaptivecpp`.
-
-To link against the main library with run-time dispatching, use the target
-``onemkl``. To link against particular domains, use the target
-``onemkl_<domain>``. For example, ``onemkl_blas`` or ``onemkl_dft``. To link
-against particular backends (as required for static dispatch of oneAPI calls to
-a particular backend), use the target ``onemkl_<domain>_<backend>``. For
-example, ``onemkl_dft_cufft``.
-
-.. _FetchContent: https://cmake.org/cmake/help/latest/module/FetchContent.html
diff --git a/examples/README.md b/examples/README.md
index 45a100131..9c48a5474 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,20 +1,18 @@
-# oneAPI Math Kernel Library (oneMKL) Interfaces Examples 
-oneAPI Math Kernel Library (oneMKL) Interfaces offers examples with the following routines: 
-- blas: level3/gemm_usm  
-- rng: uniform_usm  
+# oneMath Examples
+oneMath offers examples with the following routines:
+- blas: level3/gemm_usm
+- rng: uniform_usm
 - lapack: getrs_usm
 - dft: complex_fwd_usm, real_fwd_usm
 - sparse_blas: sparse_spmv_usm
 
 Each routine has one run-time dispatching example and one compile-time dispatching example (which uses both mklcpu and cuda backends), located in `example/<$domain>/run_time_dispatching` and `example/<$domain>/compile_time_dispatching` subfolders, respectively.
 
-To build examples, use cmake build option `-DBUILD_EXAMPLES=true`.  
+To build examples, use cmake build option `-DBUILD_EXAMPLES=true`.
 Compile_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and cuda backend is enabled, because the compile-time dispatching example runs on both mklcpu and cuda backends.
 Run_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and `-DBUILD_SHARED_LIBS=true`.
 
-The example executable naming convention follows `example_<$domain>_<$routine>_<$backend>` for compile-time dispatching examples 
-  or `example_<$domain>_<$routine>` for run-time dispatching examples. 
-  E.g. `example_blas_gemm_usm_mklcpu_cublas `  `example_blas_gemm_usm`
+The example executable naming convention follows `example_<$domain>_<$routine>_<$backend>` for compile-time dispatching examples or `example_<$domain>_<$routine>` for run-time dispatching examples. E.g. `example_blas_gemm_usm_mklcpu_cublas `  `example_blas_gemm_usm`
 
 ## Example outputs (blas, rng, lapack, dft, sparse_blas)
   
@@ -479,7 +477,7 @@ Device name is: Intel(R) UHD Graphics 750
 Running with single precision real data type:
 DFT example run_time dispatch
 Unsupported Configuration:
-	oneMKL: dft/backends/portfft/commit: function is not implemented portFFT only supports complex to complex transforms
+	oneMath: dft/backends/portfft/commit: function is not implemented REAL domain is unsupported
 ```
 
 ## sparse_blas
diff --git a/examples/blas/compile_time_dispatching/level3/CMakeLists.txt b/examples/blas/compile_time_dispatching/level3/CMakeLists.txt
index 294e000de..eb33a7497 100644
--- a/examples/blas/compile_time_dispatching/level3/CMakeLists.txt
+++ b/examples/blas/compile_time_dispatching/level3/CMakeLists.txt
@@ -32,13 +32,13 @@ foreach(blas_ct_source ${BLAS_CT_SOURCES})
   )
 
   if(domain STREQUAL "blas" AND ENABLE_MKLCPU_BACKEND AND ENABLE_CUBLAS_BACKEND)
-    add_dependencies(example_${domain}_${blas_ct_source} onemkl_${domain}_mklcpu onemkl_${domain}_cublas)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu onemkl_${domain}_cublas)
+    add_dependencies(example_${domain}_${blas_ct_source} onemath_${domain}_mklcpu onemath_${domain}_cublas)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_mklcpu onemath_${domain}_cublas)
   endif()
 
   target_link_libraries(example_${domain}_${blas_ct_source} PUBLIC
-      ${ONEMKL_LIBRARIES_${domain}}
-      ONEMKL::SYCL::SYCL
+      ${ONEMATH_LIBRARIES_${domain}}
+      ONEMATH::SYCL::SYCL
   )
 
   # Register example as ctest
diff --git a/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp b/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp
index c6beb44da..295cc4dea 100644
--- a/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp
+++ b/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp
@@ -20,13 +20,13 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPCPP API oneapi::mkl::blas::gemm
+*       This example demonstrates use of DPCPP API oneapi::math::blas::gemm
 *       using unified shared memory to perform General Matrix-Matrix
 *       Multiplication on a INTEL CPU SYCL device and an NVIDIA GPU SYCL device
 *
 *       C = alpha * op(A) * op(B) + beta * C
 *
-*       where op() is defined by one of oneapi::mkl::transpose::{nontrans,trans,conjtrans}
+*       where op() is defined by one of oneapi::math::transpose::{nontrans,trans,conjtrans}
 *
 *
 *       This example demonstrates only single precision (float) data type for
@@ -41,13 +41,13 @@
 #include <iostream>
 #include <vector>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 // local includes
 #include "example_helper.hpp"
@@ -67,8 +67,8 @@ void run_gemm_example(const sycl::device& cpu_dev, const sycl::device& gpu_dev)
     //
     // C = alpha * op(A) * op(B)  + beta * C
     //
-    oneapi::mkl::transpose transA = oneapi::mkl::transpose::trans;
-    oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans;
+    oneapi::math::transpose transA = oneapi::math::transpose::trans;
+    oneapi::math::transpose transB = oneapi::math::transpose::nontrans;
 
     // matrix data sizes
     int m = 45;
@@ -79,8 +79,8 @@ void run_gemm_example(const sycl::device& cpu_dev, const sycl::device& gpu_dev)
     int ldA = 103;
     int ldB = 105;
     int ldC = 106;
-    int sizea = (transA == oneapi::mkl::transpose::nontrans) ? ldA * k : ldA * m;
-    int sizeb = (transB == oneapi::mkl::transpose::nontrans) ? ldB * n : ldB * k;
+    int sizea = (transA == oneapi::math::transpose::nontrans) ? ldA * k : ldA * m;
+    int sizeb = (transB == oneapi::math::transpose::nontrans) ? ldB * n : ldB * k;
     int sizec = ldC * n;
 
     // set scalar fp values
@@ -131,7 +131,7 @@ void run_gemm_example(const sycl::device& cpu_dev, const sycl::device& gpu_dev)
 
     rand_matrix(A, transA, m, k, ldA);
     rand_matrix(B, transB, k, n, ldB);
-    rand_matrix(C, oneapi::mkl::transpose::nontrans, m, n, ldC);
+    rand_matrix(C, oneapi::math::transpose::nontrans, m, n, ldC);
 
     //
     // Preparation on CPU
@@ -172,13 +172,13 @@ void run_gemm_example(const sycl::device& cpu_dev, const sycl::device& gpu_dev)
     //
     // Execute Gemm on CPU and GPU device
     //
-    // add oneapi::mkl::blas::gemm to execution queue
-    cpu_gemm_done = oneapi::mkl::blas::column_major::gemm(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, transA, transB, m,
-        n, k, alpha, cpu_A, ldA, cpu_B, ldB, beta, cpu_C, ldC);
-    gpu_gemm_done = oneapi::mkl::blas::column_major::gemm(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ gpu_queue }, transA, transB, m,
-        n, k, alpha, gpu_A, ldA, gpu_B, ldB, beta, gpu_C, ldC);
+    // add oneapi::math::blas::gemm to execution queue
+    cpu_gemm_done = oneapi::math::blas::column_major::gemm(
+        oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ cpu_queue }, transA, transB,
+        m, n, k, alpha, cpu_A, ldA, cpu_B, ldB, beta, cpu_C, ldC);
+    gpu_gemm_done = oneapi::math::blas::column_major::gemm(
+        oneapi::math::backend_selector<oneapi::math::backend::cublas>{ gpu_queue }, transA, transB,
+        m, n, k, alpha, gpu_A, ldA, gpu_B, ldB, beta, gpu_C, ldC);
 
     // Wait until calculations are done
     cpu_gemm_done.wait_and_throw();
@@ -196,13 +196,13 @@ void run_gemm_example(const sycl::device& cpu_dev, const sycl::device& gpu_dev)
     // print results
     std::cout << "\n\t\tGEMM parameters:" << std::endl;
     std::cout << "\t\t\ttransA = "
-              << (transA == oneapi::mkl::transpose::nontrans
+              << (transA == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (transA == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (transA == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << ", transB = "
-              << (transB == oneapi::mkl::transpose::nontrans
+              << (transB == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (transB == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (transB == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << std::endl;
     std::cout << "\t\t\tm = " << m << ", n = " << n << ", k = " << k << std::endl;
     std::cout << "\t\t\tlda = " << ldA << ", ldB = " << ldB << ", ldC = " << ldC << std::endl;
diff --git a/examples/blas/run_time_dispatching/level3/CMakeLists.txt b/examples/blas/run_time_dispatching/level3/CMakeLists.txt
index d0d35fc0d..1b3f992c4 100644
--- a/examples/blas/run_time_dispatching/level3/CMakeLists.txt
+++ b/examples/blas/run_time_dispatching/level3/CMakeLists.txt
@@ -64,15 +64,15 @@ foreach(blas_rt_source ${BLAS_RT_SOURCES})
       PUBLIC ${CMAKE_BINARY_DIR}/bin
   )
 
-  add_dependencies(example_${domain}_${blas_rt_source} onemkl)
+  add_dependencies(example_${domain}_${blas_rt_source} onemath)
 
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET example_${domain}_${blas_rt_source} SOURCES ${BLAS_RT_SOURCES})
   endif()
 
   target_link_libraries(example_${domain}_${blas_rt_source} PUBLIC
-      onemkl
-      ONEMKL::SYCL::SYCL
+      onemath
+      ONEMATH::SYCL::SYCL
       ${CMAKE_DL_LIBS}
   )
 
diff --git a/examples/blas/run_time_dispatching/level3/gemm_usm.cpp b/examples/blas/run_time_dispatching/level3/gemm_usm.cpp
index cd59e7b7f..1cfef28fb 100644
--- a/examples/blas/run_time_dispatching/level3/gemm_usm.cpp
+++ b/examples/blas/run_time_dispatching/level3/gemm_usm.cpp
@@ -20,14 +20,14 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPCPP API oneapi::mkl::blas::gemm
+*       This example demonstrates use of DPCPP API oneapi::math::blas::gemm
 *       using unified shared memory to perform General Matrix-Matrix
 *       Multiplication on a SYCL device (HOST, CPU, GPU) that is selected
 *       during runtime.
 *
 *       C = alpha * op(A) * op(B) + beta * C
 *
-*       where op() is defined by one of oneapi::mkl::transpose::{nontrans,trans,conjtrans}
+*       where op() is defined by one of oneapi::math::transpose::{nontrans,trans,conjtrans}
 *
 *
 *       This example demonstrates only single precision (float) data type for
@@ -47,7 +47,7 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "example_helper.hpp"
 
@@ -67,8 +67,8 @@ void run_gemm_example(const sycl::device& dev) {
     // C = alpha * op(A) * op(B)  + beta * C
     //
 
-    oneapi::mkl::transpose transA = oneapi::mkl::transpose::trans;
-    oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans;
+    oneapi::math::transpose transA = oneapi::math::transpose::trans;
+    oneapi::math::transpose transB = oneapi::math::transpose::nontrans;
 
     // matrix data sizes
     int m = 45;
@@ -79,8 +79,8 @@ void run_gemm_example(const sycl::device& dev) {
     int ldA = 103;
     int ldB = 105;
     int ldC = 106;
-    int sizea = (transA == oneapi::mkl::transpose::nontrans) ? ldA * k : ldA * m;
-    int sizeb = (transB == oneapi::mkl::transpose::nontrans) ? ldB * n : ldB * k;
+    int sizea = (transA == oneapi::math::transpose::nontrans) ? ldA * k : ldA * m;
+    int sizeb = (transB == oneapi::math::transpose::nontrans) ? ldB * n : ldB * k;
     int sizec = ldC * n;
 
     // set scalar fp values
@@ -116,7 +116,7 @@ void run_gemm_example(const sycl::device& dev) {
 
     rand_matrix(A, transA, m, k, ldA);
     rand_matrix(B, transB, k, n, ldB);
-    rand_matrix(C, oneapi::mkl::transpose::nontrans, m, n, ldC);
+    rand_matrix(C, oneapi::math::transpose::nontrans, m, n, ldC);
 
     // allocate memory on device
     auto dev_A = sycl::malloc_device<float>(sizea * sizeof(float), main_queue);
@@ -134,9 +134,9 @@ void run_gemm_example(const sycl::device& dev) {
     //
     // Execute Gemm
     //
-    // add oneapi::mkl::blas::gemm to execution queue
-    gemm_done = oneapi::mkl::blas::column_major::gemm(main_queue, transA, transB, m, n, k, alpha,
-                                                      dev_A, ldA, dev_B, ldB, beta, dev_C, ldC);
+    // add oneapi::math::blas::gemm to execution queue
+    gemm_done = oneapi::math::blas::column_major::gemm(main_queue, transA, transB, m, n, k, alpha,
+                                                       dev_A, ldA, dev_B, ldB, beta, dev_C, ldC);
 
     // Wait until calculations are done
     main_queue.wait_and_throw();
@@ -149,13 +149,13 @@ void run_gemm_example(const sycl::device& dev) {
 
     std::cout << "\n\t\tGEMM parameters:" << std::endl;
     std::cout << "\t\t\ttransA = "
-              << (transA == oneapi::mkl::transpose::nontrans
+              << (transA == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (transA == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (transA == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << ", transB = "
-              << (transB == oneapi::mkl::transpose::nontrans
+              << (transB == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (transB == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (transB == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << std::endl;
     std::cout << "\t\t\tm = " << m << ", n = " << n << ", k = " << k << std::endl;
     std::cout << "\t\t\tlda = " << ldA << ", ldB = " << ldB << ", ldC = " << ldC << std::endl;
diff --git a/examples/dft/compile_time_dispatching/CMakeLists.txt b/examples/dft/compile_time_dispatching/CMakeLists.txt
index ed0ca2922..8c7665cb2 100644
--- a/examples/dft/compile_time_dispatching/CMakeLists.txt
+++ b/examples/dft/compile_time_dispatching/CMakeLists.txt
@@ -34,13 +34,13 @@ foreach(dft_ct_source ${DFT_CT_SOURCES})
   )
 
   if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND)
-    add_dependencies(${EXAMPLE_NAME} onemkl_${domain}_mklcpu onemkl_${domain}_cufft)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu onemkl_${domain}_cufft)
+    add_dependencies(${EXAMPLE_NAME} onemath_${domain}_mklcpu onemath_${domain}_cufft)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_mklcpu onemath_${domain}_cufft)
   endif()
 
   target_link_libraries(${EXAMPLE_NAME} PUBLIC
-    ${ONEMKL_LIBRARIES_${domain}}
-    onemkl_warnings
+    ${ONEMATH_LIBRARIES_${domain}}
+    onemath_warnings
   )
 
   # Register example as ctest
diff --git a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp
index 59c810f3f..864e073eb 100644
--- a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp
+++ b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp
@@ -20,13 +20,13 @@
 // STL includes
 #include <iostream>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include <complex>
 
 void run_example(const sycl::device& cpu_device, const sycl::device& gpu_device) {
@@ -82,28 +82,28 @@ void run_example(const sycl::device& cpu_device, const sycl::device& gpu_device)
 
     // enabling
     // 1. create descriptors
-    oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::SINGLE,
-                                 oneapi::mkl::dft::domain::COMPLEX>
+    oneapi::math::dft::descriptor<oneapi::math::dft::precision::SINGLE,
+                                  oneapi::math::dft::domain::COMPLEX>
         desc(static_cast<std::int64_t>(N));
 
     // 2. variadic set_value
-    desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                   oneapi::mkl::dft::config_value::NOT_INPLACE);
-    desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
+    desc.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                   oneapi::math::dft::config_value::NOT_INPLACE);
+    desc.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS,
                    static_cast<std::int64_t>(1));
 
     // 3a. commit_descriptor (compile_time MKLCPU)
-    desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue });
+    desc.commit(oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ cpu_queue });
 
     // 4a. compute_forward / compute_backward (MKLCPU)
-    oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
+    oneapi::math::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
         desc, cpu_input_data, cpu_output_data);
 
     // 3b. commit_descriptor (compile_time cuFFT)
-    desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::cufft>{ gpu_queue });
+    desc.commit(oneapi::math::backend_selector<oneapi::math::backend::cufft>{ gpu_queue });
 
     // 4b. compute_forward / compute_backward (cuFFT)
-    oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
+    oneapi::math::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
         desc, gpu_input_data, gpu_output_data);
 
     cpu_queue.wait_and_throw();
diff --git a/examples/dft/run_time_dispatching/CMakeLists.txt b/examples/dft/run_time_dispatching/CMakeLists.txt
index e221c7950..5d9f201cc 100644
--- a/examples/dft/run_time_dispatching/CMakeLists.txt
+++ b/examples/dft/run_time_dispatching/CMakeLists.txt
@@ -57,17 +57,17 @@ foreach(dft_rt_sources ${DFT_RT_SOURCES})
       PUBLIC ${CMAKE_BINARY_DIR}/bin
   )
 
-  add_dependencies(example_${domain}_${dft_rt_sources} onemkl)
+  add_dependencies(example_${domain}_${dft_rt_sources} onemath)
 
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET example_${domain}_${dft_rt_sources} SOURCES ${DFT_RT_SOURCES})
   endif()
 
   target_link_libraries(example_${domain}_${dft_rt_sources}
-      PUBLIC onemkl
-      PUBLIC ONEMKL::SYCL::SYCL
+      PUBLIC onemath
+      PUBLIC ONEMATH::SYCL::SYCL
       PUBLIC ${CMAKE_DL_LIBS}
-      PRIVATE onemkl_warnings
+      PRIVATE onemath_warnings
   )
 
   # Register example as ctest
diff --git a/examples/dft/run_time_dispatching/real_fwd_usm.cpp b/examples/dft/run_time_dispatching/real_fwd_usm.cpp
index c220b0ee7..f674a5c42 100644
--- a/examples/dft/run_time_dispatching/real_fwd_usm.cpp
+++ b/examples/dft/run_time_dispatching/real_fwd_usm.cpp
@@ -21,14 +21,14 @@
 #include <iostream>
 #include <cstdint>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 void run_example(const sycl::device& dev) {
     constexpr std::size_t N = 16;
@@ -53,21 +53,21 @@ void run_example(const sycl::device& dev) {
     auto x_usm = sycl::malloc_shared<float>(N * 2, sycl_queue);
 
     // 1. create descriptors
-    oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::SINGLE,
-                                 oneapi::mkl::dft::domain::REAL>
+    oneapi::math::dft::descriptor<oneapi::math::dft::precision::SINGLE,
+                                  oneapi::math::dft::domain::REAL>
         desc(static_cast<std::int64_t>(N));
 
     // 2. variadic set_value
-    desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
+    desc.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS,
                    static_cast<std::int64_t>(1));
-    desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                   oneapi::mkl::dft::config_value::INPLACE);
+    desc.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                   oneapi::math::dft::config_value::INPLACE);
 
     // 3. commit_descriptor (runtime dispatch)
     desc.commit(sycl_queue);
 
     // 4. compute_forward / compute_backward (runtime dispatch)
-    auto compute_event = oneapi::mkl::dft::compute_forward(desc, x_usm);
+    auto compute_event = oneapi::math::dft::compute_forward(desc, x_usm);
 
     // Do something with transformed data.
     compute_event.wait();
@@ -122,7 +122,7 @@ int main(int /*argc*/, char** /*argv*/) {
         run_example(my_dev);
         std::cout << "DFT example ran OK" << std::endl;
     }
-    catch (oneapi::mkl::unimplemented const& e) {
+    catch (oneapi::math::unimplemented const& e) {
         std::cerr << "Unsupported Configuration:" << std::endl;
         std::cerr << "\t" << e.what() << std::endl;
         return 0;
diff --git a/examples/include/example_helper.hpp b/examples/include/example_helper.hpp
index c5da54acf..681ab9142 100644
--- a/examples/include/example_helper.hpp
+++ b/examples/include/example_helper.hpp
@@ -88,10 +88,10 @@ fp rand_scalar() {
 }
 
 template <typename vec>
-void rand_matrix(vec& M, oneapi::mkl::transpose trans, int m, int n, int ld) {
+void rand_matrix(vec& M, oneapi::math::transpose trans, int m, int n, int ld) {
     using fp = typename vec::value_type;
 
-    if (trans == oneapi::mkl::transpose::nontrans) {
+    if (trans == oneapi::math::transpose::nontrans) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++)
                 M.at(i + j * ld) = rand_scalar<fp>();
diff --git a/examples/lapack/compile_time_dispatching/CMakeLists.txt b/examples/lapack/compile_time_dispatching/CMakeLists.txt
index cc126674f..f72ea3c7b 100644
--- a/examples/lapack/compile_time_dispatching/CMakeLists.txt
+++ b/examples/lapack/compile_time_dispatching/CMakeLists.txt
@@ -36,13 +36,13 @@ foreach(lapack_ct_source ${LAPACK_CT_SOURCES})
       PUBLIC ${CMAKE_BINARY_DIR}/bin
   )
   if(domain STREQUAL "lapack" AND ENABLE_MKLCPU_BACKEND AND ENABLE_CUSOLVER_BACKEND)
-    add_dependencies(example_${domain}_${lapack_ct_source} onemkl_${domain}_mklcpu onemkl_${domain}_cusolver)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu onemkl_${domain}_cusolver)
+    add_dependencies(example_${domain}_${lapack_ct_source} onemath_${domain}_mklcpu onemath_${domain}_cusolver)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_mklcpu onemath_${domain}_cusolver)
     target_link_libraries(example_${domain}_${lapack_ct_source} PUBLIC ${OPENCL_LIBRARY})
   endif()
   target_link_libraries(example_${domain}_${lapack_ct_source} PUBLIC
-      ${ONEMKL_LIBRARIES_${domain}}
-      ONEMKL::SYCL::SYCL
+      ${ONEMATH_LIBRARIES_${domain}}
+      ONEMATH::SYCL::SYCL
   )
   # Register example as ctest
  add_test(NAME ${domain}/EXAMPLE/CT/${lapack_ct_source} COMMAND example_${domain}_${lapack_ct_source})
diff --git a/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp b/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp
index 2d6017d08..658006938 100644
--- a/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp
+++ b/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp
@@ -20,8 +20,8 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of oneapi::mkl::lapack::getrf and
-*       oneapi::mkl::lapack::getrs to perform LU factorization and compute
+*       This example demonstrates use of oneapi::math::lapack::getrf and
+*       oneapi::math::lapack::getrs to perform LU factorization and compute
 *       the solution on both an Intel CPU device and NVIDIA GPU device.
 *
 *       This example demonstrates only single precision (float) data type
@@ -35,13 +35,13 @@
 #include <iostream>
 #include <vector>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 // local includes
 #include "example_helper.hpp"
@@ -67,7 +67,7 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     std::int64_t A_size = n * lda;
     std::int64_t B_size = nrhs * ldb;
     std::int64_t ipiv_size = n;
-    oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
+    oneapi::math::transpose trans = oneapi::math::transpose::nontrans;
 
     // Catch asynchronous exceptions for CPU and GPU
     auto cpu_error_handler = [&](sycl::exception_list exceptions) {
@@ -75,7 +75,7 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
             try {
                 std::rethrow_exception(e);
             }
-            catch (oneapi::mkl::lapack::exception const& e) {
+            catch (oneapi::math::lapack::exception const& e) {
                 // Handle LAPACK related exceptions that happened during asynchronous call
                 std::cerr
                     << "Caught asynchronous LAPACK exception on CPU device during GETRF or GETRS:"
@@ -98,7 +98,7 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
             try {
                 std::rethrow_exception(e);
             }
-            catch (oneapi::mkl::lapack::exception const& e) {
+            catch (oneapi::math::lapack::exception const& e) {
                 // Handle LAPACK related exceptions that happened during asynchronous call
                 std::cerr
                     << "Caught asynchronous LAPACK exception on GPU device during GETRF or GETRS:"
@@ -145,10 +145,10 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     std::int64_t* cpu_ipiv =
         sycl::malloc_device<std::int64_t>(ipiv_size * sizeof(std::int64_t), cpu_queue);
 
-    std::int64_t cpu_getrf_scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<float>(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, m, n, lda);
-    std::int64_t cpu_getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
+    std::int64_t cpu_getrf_scratchpad_size = oneapi::math::lapack::getrf_scratchpad_size<float>(
+        oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ cpu_queue }, m, n, lda);
+    std::int64_t cpu_getrs_scratchpad_size = oneapi::math::lapack::getrs_scratchpad_size<float>(
+        oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
         lda, ldb);
     float* cpu_getrf_scratchpad = sycl::malloc_device<float>(
         cpu_getrf_scratchpad_size * sizeof(float), cpu_device, cpu_context);
@@ -175,11 +175,11 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     std::int64_t* gpu_ipiv =
         sycl::malloc_device<std::int64_t>(ipiv_size * sizeof(std::int64_t), gpu_queue);
 
-    std::int64_t gpu_getrf_scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<float>(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, m, n, lda);
-    std::int64_t gpu_getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, trans, n, nrhs,
-        lda, ldb);
+    std::int64_t gpu_getrf_scratchpad_size = oneapi::math::lapack::getrf_scratchpad_size<float>(
+        oneapi::math::backend_selector<oneapi::math::backend::cusolver>{ gpu_queue }, m, n, lda);
+    std::int64_t gpu_getrs_scratchpad_size = oneapi::math::lapack::getrs_scratchpad_size<float>(
+        oneapi::math::backend_selector<oneapi::math::backend::cusolver>{ gpu_queue }, trans, n,
+        nrhs, lda, ldb);
     float* gpu_getrf_scratchpad = sycl::malloc_device<float>(
         gpu_getrf_scratchpad_size * sizeof(float), gpu_device, gpu_context);
     float* gpu_getrs_scratchpad = sycl::malloc_device<float>(
@@ -196,19 +196,19 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     // Execute on CPU and GPU devices
     //
 
-    cpu_getrf_done = oneapi::mkl::lapack::getrf(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, m, n, cpu_A, lda,
-        cpu_ipiv, cpu_getrf_scratchpad, cpu_getrf_scratchpad_size);
-    cpu_getrs_done = oneapi::mkl::lapack::getrs(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
+    cpu_getrf_done = oneapi::math::lapack::getrf(
+        oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ cpu_queue }, m, n, cpu_A,
+        lda, cpu_ipiv, cpu_getrf_scratchpad, cpu_getrf_scratchpad_size);
+    cpu_getrs_done = oneapi::math::lapack::getrs(
+        oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
         cpu_A, lda, cpu_ipiv, cpu_B, ldb, cpu_getrs_scratchpad, cpu_getrs_scratchpad_size,
         { cpu_getrf_done });
-    gpu_getrf_done = oneapi::mkl::lapack::getrf(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, m, n, gpu_A,
+    gpu_getrf_done = oneapi::math::lapack::getrf(
+        oneapi::math::backend_selector<oneapi::math::backend::cusolver>{ gpu_queue }, m, n, gpu_A,
         lda, gpu_ipiv, gpu_getrf_scratchpad, gpu_getrf_scratchpad_size);
-    gpu_getrs_done = oneapi::mkl::lapack::getrs(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, trans, n, nrhs,
-        gpu_A, lda, gpu_ipiv, gpu_B, ldb, gpu_getrs_scratchpad, gpu_getrs_scratchpad_size,
+    gpu_getrs_done = oneapi::math::lapack::getrs(
+        oneapi::math::backend_selector<oneapi::math::backend::cusolver>{ gpu_queue }, trans, n,
+        nrhs, gpu_A, lda, gpu_ipiv, gpu_B, ldb, gpu_getrs_scratchpad, gpu_getrs_scratchpad_size,
         { gpu_getrf_done });
 
     // Wait until calculations are done
@@ -227,9 +227,9 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     // Print results
     std::cout << "\n\t\tGETRF and GETRS parameters:" << std::endl;
     std::cout << "\t\t\ttrans = "
-              << (trans == oneapi::mkl::transpose::nontrans
+              << (trans == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (trans == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (trans == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << std::endl;
     std::cout << "\t\t\tm = " << m << ", n = " << n << ", nrhs = " << nrhs << std::endl;
     std::cout << "\t\t\tlda = " << lda << ", ldb = " << ldb << std::endl;
@@ -308,7 +308,7 @@ int main(int argc, char** argv) {
         run_getrs_example(cpu_dev, gpu_dev);
         std::cout << "LAPACK GETRS USM example ran OK on MKLCPU and CUSOLVER" << std::endl;
     }
-    catch (oneapi::mkl::lapack::exception const& e) {
+    catch (oneapi::math::lapack::exception const& e) {
         // Handle LAPACK related exceptions that happened during synchronous call
         std::cerr << "Caught synchronous LAPACK exception:" << std::endl;
         std::cerr << "\t" << e.what() << std::endl;
diff --git a/examples/lapack/run_time_dispatching/CMakeLists.txt b/examples/lapack/run_time_dispatching/CMakeLists.txt
index 5fcf6a311..5026a6b49 100644
--- a/examples/lapack/run_time_dispatching/CMakeLists.txt
+++ b/examples/lapack/run_time_dispatching/CMakeLists.txt
@@ -49,15 +49,15 @@ foreach(lapack_rt_source ${LAPACK_RT_SOURCES})
       PUBLIC ${CMAKE_BINARY_DIR}/bin
   )
 
-  add_dependencies(example_${domain}_${lapack_rt_source} onemkl)
+  add_dependencies(example_${domain}_${lapack_rt_source} onemath)
 
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET example_${domain}_${lapack_rt_source} SOURCES ${LAPACK_RT_SOURCES})
   endif()
 
   target_link_libraries(example_${domain}_${lapack_rt_source} PUBLIC
-      onemkl
-      ONEMKL::SYCL::SYCL
+      onemath
+      ONEMATH::SYCL::SYCL
       ${CMAKE_DL_LIBS}
   )
 
diff --git a/examples/lapack/run_time_dispatching/getrs_usm.cpp b/examples/lapack/run_time_dispatching/getrs_usm.cpp
index 4cf851a7e..c31d96d6e 100644
--- a/examples/lapack/run_time_dispatching/getrs_usm.cpp
+++ b/examples/lapack/run_time_dispatching/getrs_usm.cpp
@@ -20,8 +20,8 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of oneapi::mkl::lapack::getrf and
-*       oneapi::mkl::lapack::getrs to perform LU factorization and compute
+*       This example demonstrates use of oneapi::math::lapack::getrf and
+*       oneapi::math::lapack::getrs to perform LU factorization and compute
 *       the solution on a SYCL device (HOST, CPU, GPU) that is selected
 *       during runtime.
 *
@@ -36,13 +36,13 @@
 #include <iostream>
 #include <vector>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 // local includes
 #include "example_helper.hpp"
@@ -68,7 +68,7 @@ void run_getrs_example(const sycl::device& device) {
     std::int64_t A_size = n * lda;
     std::int64_t B_size = nrhs * ldb;
     std::int64_t ipiv_size = n;
-    oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
+    oneapi::math::transpose trans = oneapi::math::transpose::nontrans;
 
     // Asynchronous error handler
     auto error_handler = [&](sycl::exception_list exceptions) {
@@ -76,7 +76,7 @@ void run_getrs_example(const sycl::device& device) {
             try {
                 std::rethrow_exception(e);
             }
-            catch (oneapi::mkl::lapack::exception const& e) {
+            catch (oneapi::math::lapack::exception const& e) {
                 // Handle LAPACK related exceptions that happened during asynchronous call
                 std::cerr << "Caught asynchronous LAPACK exception during GETRF or GETRS:"
                           << std::endl;
@@ -114,9 +114,9 @@ void run_getrs_example(const sycl::device& device) {
         sycl::malloc_device<std::int64_t>(ipiv_size * sizeof(std::int64_t), queue);
 
     std::int64_t getrf_scratchpad_size =
-        oneapi::mkl::lapack::getrf_scratchpad_size<float>(queue, m, n, lda);
+        oneapi::math::lapack::getrf_scratchpad_size<float>(queue, m, n, lda);
     std::int64_t getrs_scratchpad_size =
-        oneapi::mkl::lapack::getrs_scratchpad_size<float>(queue, trans, n, nrhs, lda, ldb);
+        oneapi::math::lapack::getrs_scratchpad_size<float>(queue, trans, n, nrhs, lda, ldb);
     float* getrf_scratchpad =
         sycl::malloc_shared<float>(getrf_scratchpad_size * sizeof(float), device, context);
     float* getrs_scratchpad =
@@ -145,11 +145,11 @@ void run_getrs_example(const sycl::device& device) {
     queue.memcpy(dev_B, B.data(), B_size * sizeof(float)).wait();
 
     // Execute on device
-    getrf_done = oneapi::mkl::lapack::getrf(queue, m, n, dev_A, lda, dev_ipiv, getrf_scratchpad,
-                                            getrf_scratchpad_size);
+    getrf_done = oneapi::math::lapack::getrf(queue, m, n, dev_A, lda, dev_ipiv, getrf_scratchpad,
+                                             getrf_scratchpad_size);
     getrs_done =
-        oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, dev_A, lda, dev_ipiv, dev_B, ldb,
-                                   getrs_scratchpad, getrs_scratchpad_size, { getrf_done });
+        oneapi::math::lapack::getrs(queue, trans, n, nrhs, dev_A, lda, dev_ipiv, dev_B, ldb,
+                                    getrs_scratchpad, getrs_scratchpad_size, { getrf_done });
 
     // Wait until calculations are done
     queue.wait_and_throw();
@@ -160,9 +160,9 @@ void run_getrs_example(const sycl::device& device) {
     // Print results
     std::cout << "\n\t\tGETRF and GETRS parameters:" << std::endl;
     std::cout << "\t\t\ttrans = "
-              << (trans == oneapi::mkl::transpose::nontrans
+              << (trans == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (trans == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (trans == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << std::endl;
     std::cout << "\t\t\tm = " << m << ", n = " << n << ", nrhs = " << nrhs << std::endl;
     std::cout << "\t\t\tlda = " << lda << ", ldb = " << ldb << std::endl;
@@ -235,7 +235,7 @@ int main(int argc, char** argv) {
         run_getrs_example(dev);
         std::cout << "LAPACK GETRS USM example ran OK" << std::endl;
     }
-    catch (oneapi::mkl::lapack::exception const& e) {
+    catch (oneapi::math::lapack::exception const& e) {
         // Handle LAPACK related exceptions that happened during synchronous call
         std::cerr << "Caught synchronous LAPACK exception:" << std::endl;
         std::cerr << "\t" << e.what() << std::endl;
diff --git a/examples/rng/compile_time_dispatching/CMakeLists.txt b/examples/rng/compile_time_dispatching/CMakeLists.txt
index 4f57db38c..bfd37eead 100644
--- a/examples/rng/compile_time_dispatching/CMakeLists.txt
+++ b/examples/rng/compile_time_dispatching/CMakeLists.txt
@@ -33,16 +33,16 @@ foreach(rng_ct_source ${RNG_CT_SOURCES})
 
   if(domain STREQUAL "rng" AND ENABLE_MKLCPU_BACKEND AND ENABLE_CURAND_BACKEND)
     add_dependencies(example_${domain}_${rng_ct_source}
-        onemkl_${domain}_mklcpu
-        onemkl_${domain}_curand)
-    list(APPEND ONEMKL_LIBRARIES_${domain}
-        onemkl_${domain}_mklcpu
-        onemkl_${domain}_curand)
+        onemath_${domain}_mklcpu
+        onemath_${domain}_curand)
+    list(APPEND ONEMATH_LIBRARIES_${domain}
+        onemath_${domain}_mklcpu
+        onemath_${domain}_curand)
   endif()
 
   target_link_libraries(example_${domain}_${rng_ct_source} PUBLIC
-      ${ONEMKL_LIBRARIES_${domain}}
-      ONEMKL::SYCL::SYCL
+      ${ONEMATH_LIBRARIES_${domain}}
+      ONEMATH::SYCL::SYCL
   )
 
   # Register example as ctest
diff --git a/examples/rng/compile_time_dispatching/uniform_usm_mklcpu_curand.cpp b/examples/rng/compile_time_dispatching/uniform_usm_mklcpu_curand.cpp
index cdfd6c765..2d15397dd 100644
--- a/examples/rng/compile_time_dispatching/uniform_usm_mklcpu_curand.cpp
+++ b/examples/rng/compile_time_dispatching/uniform_usm_mklcpu_curand.cpp
@@ -20,8 +20,8 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPC++ API oneapi::mkl::rng::uniform distribution
-*       with oneapi::mkl::rng::philox4x32x10 random number generator to produce
+*       This example demonstrates use of DPC++ API oneapi::math::rng::uniform distribution
+*       with oneapi::math::rng::philox4x32x10 random number generator to produce
 *       random numbers on a INTEL CPU SYCL device and an NVIDIA GPU SYCL device
 *       with Unified Shared Memory(USM) API.
 *
@@ -36,13 +36,13 @@
 #include <iostream>
 #include <vector>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 // local includes
 #include "example_helper.hpp"
@@ -98,12 +98,12 @@ void run_uniform_example(const sycl::device& cpu_dev, const sycl::device& gpu_de
     // preparation on CPU device and GPU device
     sycl::queue cpu_queue(cpu_dev, cpu_exception_handler);
     sycl::queue gpu_queue(gpu_dev, gpu_exception_handler);
-    oneapi::mkl::rng::default_engine cpu_engine(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, seed);
-    oneapi::mkl::rng::default_engine gpu_engine(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::curand>{ gpu_queue }, seed);
+    oneapi::math::rng::default_engine cpu_engine(
+        oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ cpu_queue }, seed);
+    oneapi::math::rng::default_engine gpu_engine(
+        oneapi::math::backend_selector<oneapi::math::backend::curand>{ gpu_queue }, seed);
 
-    oneapi::mkl::rng::uniform<float> distribution(a, b);
+    oneapi::math::rng::uniform<float> distribution(a, b);
 
     //
     // Data preparation on host: prepare array for random numbers
@@ -127,8 +127,8 @@ void run_uniform_example(const sycl::device& cpu_dev, const sycl::device& gpu_de
     //
     sycl::event event_out_cpu;
     sycl::event event_out_gpu;
-    event_out_cpu = oneapi::mkl::rng::generate(distribution, cpu_engine, n, dev_cpu);
-    event_out_gpu = oneapi::mkl::rng::generate(distribution, gpu_engine, n, dev_gpu);
+    event_out_cpu = oneapi::math::rng::generate(distribution, cpu_engine, n, dev_cpu);
+    event_out_gpu = oneapi::math::rng::generate(distribution, gpu_engine, n, dev_gpu);
     event_out_cpu.wait_and_throw();
     event_out_gpu.wait_and_throw();
 
diff --git a/examples/rng/device/CMakeLists.txt b/examples/rng/device/CMakeLists.txt
index 1b6ecf2dd..b7257d627 100644
--- a/examples/rng/device/CMakeLists.txt
+++ b/examples/rng/device/CMakeLists.txt
@@ -56,10 +56,10 @@ foreach(rng_device_source ${RNG_DEVICE_SOURCES})
   endif()
 
   target_link_libraries(example_${domain}_${rng_device_source} PUBLIC
-      ONEMKL::SYCL::SYCL
+      ONEMATH::SYCL::SYCL
   )
 
-  if(NOT ${ONEMKL_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
+  if(NOT ${ONEMATH_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
     target_link_options(example_${domain}_${rng_device_source} PUBLIC -fsycl -fsycl-device-code-split=per_kernel)
   endif()
 
diff --git a/examples/rng/device/uniform.cpp b/examples/rng/device/uniform.cpp
index a1c097bba..d9ed83dad 100644
--- a/examples/rng/device/uniform.cpp
+++ b/examples/rng/device/uniform.cpp
@@ -20,7 +20,7 @@
 /*
 *
 *  Content:
-*       This example demonstrates usage of oneapi::mkl::rng::device::mcg59
+*       This example demonstrates usage of oneapi::math::rng::device::mcg59
 *       random number generator to produce random
 *       numbers using unifrom distribution on a SYCL device (CPU, GPU).
 *
@@ -30,14 +30,14 @@
 #include <iostream>
 #include <vector>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/rng/device.hpp"
+#include "oneapi/math/rng/device.hpp"
 
 #include "rng_example_helper.hpp"
 
@@ -74,10 +74,10 @@ int run_example(sycl::queue& queue) {
                 sycl::accessor r_acc(r_buf, cgh, sycl::write_only);
                 cgh.parallel_for(sycl::range<1>(n / VecSize), [=](sycl::item<1> item) {
                     size_t item_id = item.get_id(0);
-                    oneapi::mkl::rng::device::mcg59<VecSize> engine(seed, item_id * VecSize);
-                    oneapi::mkl::rng::device::uniform<Type> distr;
+                    oneapi::math::rng::device::mcg59<VecSize> engine(seed, item_id * VecSize);
+                    oneapi::math::rng::device::uniform<Type> distr;
 
-                    auto res = oneapi::mkl::rng::device::generate(distr, engine);
+                    auto res = oneapi::math::rng::device::generate(distr, engine);
                     if constexpr (VecSize == 1) {
                         r_acc[item_id] = res;
                     }
@@ -104,13 +104,13 @@ int run_example(sycl::queue& queue) {
     } // buffer life-time ends
 
     // compare results with host-side generation
-    oneapi::mkl::rng::device::mcg59<1> engine(seed);
-    oneapi::mkl::rng::device::uniform<Type> distr;
+    oneapi::math::rng::device::mcg59<1> engine(seed);
+    oneapi::math::rng::device::uniform<Type> distr;
 
     int err = 0;
     Type res_host;
     for (int i = 0; i < n; i++) {
-        res_host = oneapi::mkl::rng::device::generate(distr, engine);
+        res_host = oneapi::math::rng::device::generate(distr, engine);
         if (res_host != r_dev[i]) {
             std::cout << "error in " << i << " element " << res_host << " " << r_dev[i]
                       << std::endl;
diff --git a/examples/rng/run_time_dispatching/CMakeLists.txt b/examples/rng/run_time_dispatching/CMakeLists.txt
index d3bcc0f19..8de795c02 100644
--- a/examples/rng/run_time_dispatching/CMakeLists.txt
+++ b/examples/rng/run_time_dispatching/CMakeLists.txt
@@ -50,15 +50,15 @@ foreach(rng_rt_source ${RNG_RT_SOURCES})
       PUBLIC ${CMAKE_BINARY_DIR}/bin
   )
 
-  add_dependencies(example_${domain}_${rng_rt_source} onemkl)
+  add_dependencies(example_${domain}_${rng_rt_source} onemath)
 
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET example_${domain}_${rng_rt_source} SOURCES ${RNG_RT_SOURCES})
   endif()
 
   target_link_libraries(example_${domain}_${rng_rt_source} PUBLIC
-      onemkl
-      ONEMKL::SYCL::SYCL
+      onemath
+      ONEMATH::SYCL::SYCL
       ${CMAKE_DL_LIBS}
   )
 
diff --git a/examples/rng/run_time_dispatching/uniform_usm.cpp b/examples/rng/run_time_dispatching/uniform_usm.cpp
index 8ac7363c8..34a3d4c99 100644
--- a/examples/rng/run_time_dispatching/uniform_usm.cpp
+++ b/examples/rng/run_time_dispatching/uniform_usm.cpp
@@ -20,8 +20,8 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPC++ API oneapi::mkl::rng::uniform distribution
-*       with oneapi::mkl::rng::philox4x32x10 random number generator to produce
+*       This example demonstrates use of DPC++ API oneapi::math::rng::uniform distribution
+*       with oneapi::math::rng::philox4x32x10 random number generator to produce
 *       random numbers on a SYCL device (HOST, CPU, GPU) that is selected
 *       during runtime with Unified Shared Memory(USM) API.
 *
@@ -36,13 +36,13 @@
 #include <iostream>
 #include <vector>
 
-// oneMKL/SYCL includes
+// oneMath/SYCL includes
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 // local includes
 #include "example_helper.hpp"
@@ -83,8 +83,8 @@ void run_uniform_example(const sycl::device& dev) {
     float a(0.0);
     float b(10.0);
 
-    oneapi::mkl::rng::default_engine engine(queue, seed);
-    oneapi::mkl::rng::uniform<float> distribution(a, b);
+    oneapi::math::rng::default_engine engine(queue, seed);
+    oneapi::math::rng::uniform<float> distribution(a, b);
 
     //
     // Data preparation on host: prepare array for random numbers
@@ -101,7 +101,7 @@ void run_uniform_example(const sycl::device& dev) {
     // Perform generation on device
     //
     sycl::event event_out;
-    event_out = oneapi::mkl::rng::generate(distribution, engine, n, dev_r);
+    event_out = oneapi::math::rng::generate(distribution, engine, n, dev_r);
     event_out.wait_and_throw();
 
     //
diff --git a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt
index a38f4ebd4..8c586056d 100644
--- a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt
+++ b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt
@@ -33,7 +33,7 @@ foreach(sparse_ct_source ${SPARSE_CT_SOURCES})
       PUBLIC ${CMAKE_BINARY_DIR}/bin
   )
 
-  target_link_libraries(${sparse_ct_source} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_mklcpu onemkl_sparse_blas_cusparse)
+  target_link_libraries(${sparse_ct_source} PRIVATE ONEMATH::SYCL::SYCL onemath_sparse_blas_mklcpu onemath_sparse_blas_cusparse)
 
   # Register example as ctest
   add_test(NAME sparse_blas/EXAMPLE/CT/${sparse_ct_source} COMMAND ${sparse_ct_source})
diff --git a/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu_cusparse.cpp b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu_cusparse.cpp
index 31ce1975c..2e0c68422 100644
--- a/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu_cusparse.cpp
+++ b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu_cusparse.cpp
@@ -20,7 +20,7 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv
+*       This example demonstrates use of DPCPP API oneapi::math::sparse::spmv
 *       using unified shared memory to perform general sparse matrix-vector
 *       multiplication on a INTEL CPU SYCL device and an NVIDIA GPU SYCL device.
 *
@@ -28,7 +28,7 @@
 *
 *       where op() is defined by one of
 *
-*           oneapi::mkl::transpose::{nontrans,trans,conjtrans}
+*           oneapi::math::transpose::{nontrans,trans,conjtrans}
 *
 *
 *       This example demonstrates only single precision (float) data type for
@@ -46,7 +46,7 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "example_helper.hpp"
 
@@ -112,62 +112,63 @@ int run_sparse_matrix_vector_multiply_example(selectorType& selector) {
     // Execute Matrix Multiply
     //
 
-    oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
-    oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg;
-    oneapi::mkl::sparse::matrix_view A_view;
+    oneapi::math::transpose transA = oneapi::math::transpose::nontrans;
+    oneapi::math::sparse::spmv_alg alg = oneapi::math::sparse::spmv_alg::default_alg;
+    oneapi::math::sparse::matrix_view A_view;
 
     std::cout << "\n\t\tsparse::spmv parameters:\n";
     std::cout << "\t\t\ttransA = "
-              << (transA == oneapi::mkl::transpose::nontrans
+              << (transA == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (transA == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (transA == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << std::endl;
     std::cout << "\t\t\tsize = " << size << std::endl;
     std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl;
 
     // Create and initialize handle for a Sparse Matrix in COO format sorted by rows
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::init_coo_matrix(selector, &A_handle, size, size, nnz,
-                                         oneapi::mkl::index_base::zero, ia, ja, a);
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::init_coo_matrix(selector, &A_handle, size, size, nnz,
+                                          oneapi::math::index_base::zero, ia, ja, a);
     // cuSPARSE backend requires that the property sorted_by_rows or sorted is set when using matrices in COO format.
     // Setting these properties is also the best practice to get best performance.
-    oneapi::mkl::sparse::set_matrix_property(selector, A_handle,
-                                             oneapi::mkl::sparse::matrix_property::sorted_by_rows);
+    oneapi::math::sparse::set_matrix_property(
+        selector, A_handle, oneapi::math::sparse::matrix_property::sorted_by_rows);
 
     // Create and initialize dense vector handles
-    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
-    oneapi::mkl::sparse::init_dense_vector(selector, &x_handle, size, x);
-    oneapi::mkl::sparse::init_dense_vector(selector, &y_handle, size, y);
+    oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::math::sparse::init_dense_vector(selector, &x_handle, size, x);
+    oneapi::math::sparse::init_dense_vector(selector, &y_handle, size, y);
 
     // Create operation descriptor
-    oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
-    oneapi::mkl::sparse::init_spmv_descr(selector, &descr);
+    oneapi::math::sparse::spmv_descr_t descr = nullptr;
+    oneapi::math::sparse::init_spmv_descr(selector, &descr);
 
     // Allocate external workspace
     std::size_t workspace_size = 0;
-    oneapi::mkl::sparse::spmv_buffer_size(selector, transA, &alpha, A_view, A_handle, x_handle,
-                                          &beta, y_handle, alg, descr, workspace_size);
+    oneapi::math::sparse::spmv_buffer_size(selector, transA, &alpha, A_view, A_handle, x_handle,
+                                           &beta, y_handle, alg, descr, workspace_size);
     void* workspace = sycl::malloc_device(workspace_size, queue);
 
     // Optimize spmv
     auto ev_opt =
-        oneapi::mkl::sparse::spmv_optimize(selector, transA, &alpha, A_view, A_handle, x_handle,
-                                           &beta, y_handle, alg, descr, workspace);
+        oneapi::math::sparse::spmv_optimize(selector, transA, &alpha, A_view, A_handle, x_handle,
+                                            &beta, y_handle, alg, descr, workspace);
 
     // Run spmv
-    auto ev_spmv = oneapi::mkl::sparse::spmv(selector, transA, &alpha, A_view, A_handle, x_handle,
-                                             &beta, y_handle, alg, descr, { ev_opt });
+    auto ev_spmv = oneapi::math::sparse::spmv(selector, transA, &alpha, A_view, A_handle, x_handle,
+                                              &beta, y_handle, alg, descr, { ev_opt });
 
     // Release handles and descriptor
     std::vector<sycl::event> release_events;
     release_events.push_back(
-        oneapi::mkl::sparse::release_dense_vector(selector, x_handle, { ev_spmv }));
+        oneapi::math::sparse::release_dense_vector(selector, x_handle, { ev_spmv }));
     release_events.push_back(
-        oneapi::mkl::sparse::release_dense_vector(selector, y_handle, { ev_spmv }));
+        oneapi::math::sparse::release_dense_vector(selector, y_handle, { ev_spmv }));
     release_events.push_back(
-        oneapi::mkl::sparse::release_sparse_matrix(selector, A_handle, { ev_spmv }));
-    release_events.push_back(oneapi::mkl::sparse::release_spmv_descr(selector, descr, { ev_spmv }));
+        oneapi::math::sparse::release_sparse_matrix(selector, A_handle, { ev_spmv }));
+    release_events.push_back(
+        oneapi::math::sparse::release_spmv_descr(selector, descr, { ev_spmv }));
     for (auto event : release_events) {
         event.wait_and_throw();
     }
@@ -259,8 +260,8 @@ int main(int /*argc*/, char** /*argv*/) {
             std::cerr << "FAILED: NVIDIA GPU device not found" << std::endl;
             return 1;
         }
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu> cpu_selector{ cpu_queue };
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusparse> gpu_selector{ gpu_queue };
+        oneapi::math::backend_selector<oneapi::math::backend::mklcpu> cpu_selector{ cpu_queue };
+        oneapi::math::backend_selector<oneapi::math::backend::cusparse> gpu_selector{ gpu_queue };
 
         std::cout << "Running Sparse BLAS SPMV USM example on:" << std::endl;
         std::cout << "\tCPU device: " << cpu_queue.get_device().get_info<sycl::info::device::name>()
diff --git a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt
index f09daf819..97bd407fb 100644
--- a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt
+++ b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt
@@ -47,17 +47,17 @@ foreach(sparse_blas_rt_sources ${SPARSE_BLAS_RT_SOURCES})
       PUBLIC ${CMAKE_BINARY_DIR}/bin
   )
 
-  add_dependencies(example_${sparse_blas_rt_sources} onemkl)
+  add_dependencies(example_${sparse_blas_rt_sources} onemath)
 
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET example_${sparse_blas_rt_sources} SOURCES ${SPARSE_BLAS_RT_SOURCES})
   endif()
 
   target_link_libraries(example_${sparse_blas_rt_sources}
-      PUBLIC onemkl
-      PUBLIC ONEMKL::SYCL::SYCL
+      PUBLIC onemath
+      PUBLIC ONEMATH::SYCL::SYCL
       PUBLIC ${CMAKE_DL_LIBS}
-      PRIVATE onemkl_warnings
+      PRIVATE onemath_warnings
   )
 
   # Register example as ctest
diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp
index 69be82745..a6ff30354 100644
--- a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp
+++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp
@@ -20,7 +20,7 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv
+*       This example demonstrates use of DPCPP API oneapi::math::sparse::spmv
 *       using unified shared memory to perform general sparse matrix-vector
 *       multiplication on a SYCL device (HOST, CPU, GPU) that is selected
 *       during runtime.
@@ -29,7 +29,7 @@
 *
 *       where op() is defined by one of
 *
-*           oneapi::mkl::transpose::{nontrans,trans,conjtrans}
+*           oneapi::math::transpose::{nontrans,trans,conjtrans}
 *
 *
 *       This example demonstrates only single precision (float) data type for
@@ -47,7 +47,7 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "example_helper.hpp"
 
@@ -128,59 +128,59 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device& dev) {
     // Execute Matrix Multiply
     //
 
-    oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
-    oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg;
-    oneapi::mkl::sparse::matrix_view A_view;
+    oneapi::math::transpose transA = oneapi::math::transpose::nontrans;
+    oneapi::math::sparse::spmv_alg alg = oneapi::math::sparse::spmv_alg::default_alg;
+    oneapi::math::sparse::matrix_view A_view;
 
     std::cout << "\n\t\tsparse::spmv parameters:\n";
     std::cout << "\t\t\ttransA = "
-              << (transA == oneapi::mkl::transpose::nontrans
+              << (transA == oneapi::math::transpose::nontrans
                       ? "nontrans"
-                      : (transA == oneapi::mkl::transpose::trans ? "trans" : "conjtrans"))
+                      : (transA == oneapi::math::transpose::trans ? "trans" : "conjtrans"))
               << std::endl;
     std::cout << "\t\t\tnrows = " << nrows << std::endl;
     std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl;
 
     // Create and initialize handle for a Sparse Matrix in CSR format
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz,
-                                         oneapi::mkl::index_base::zero, ia, ja, a);
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz,
+                                          oneapi::math::index_base::zero, ia, ja, a);
 
     // Create and initialize dense vector handles
-    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
-    oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, sizevec_i64, x);
-    oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, sizevec_i64, y);
+    oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::math::sparse::init_dense_vector(main_queue, &x_handle, sizevec_i64, x);
+    oneapi::math::sparse::init_dense_vector(main_queue, &y_handle, sizevec_i64, y);
 
     // Create operation descriptor
-    oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
-    oneapi::mkl::sparse::init_spmv_descr(main_queue, &descr);
+    oneapi::math::sparse::spmv_descr_t descr = nullptr;
+    oneapi::math::sparse::init_spmv_descr(main_queue, &descr);
 
     // Allocate external workspace
     std::size_t workspace_size = 0;
-    oneapi::mkl::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle,
-                                          &beta, y_handle, alg, descr, workspace_size);
+    oneapi::math::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle,
+                                           &beta, y_handle, alg, descr, workspace_size);
     void* workspace = sycl::malloc_device(workspace_size, main_queue);
 
     // Optimize spmv
     auto ev_opt =
-        oneapi::mkl::sparse::spmv_optimize(main_queue, transA, &alpha, A_view, A_handle, x_handle,
-                                           &beta, y_handle, alg, descr, workspace);
+        oneapi::math::sparse::spmv_optimize(main_queue, transA, &alpha, A_view, A_handle, x_handle,
+                                            &beta, y_handle, alg, descr, workspace);
 
     // Run spmv
-    auto ev_spmv = oneapi::mkl::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle, x_handle,
-                                             &beta, y_handle, alg, descr, { ev_opt });
+    auto ev_spmv = oneapi::math::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle,
+                                              x_handle, &beta, y_handle, alg, descr, { ev_opt });
 
     // Release handles and descriptor
     std::vector<sycl::event> release_events;
     release_events.push_back(
-        oneapi::mkl::sparse::release_dense_vector(main_queue, x_handle, { ev_spmv }));
+        oneapi::math::sparse::release_dense_vector(main_queue, x_handle, { ev_spmv }));
     release_events.push_back(
-        oneapi::mkl::sparse::release_dense_vector(main_queue, y_handle, { ev_spmv }));
+        oneapi::math::sparse::release_dense_vector(main_queue, y_handle, { ev_spmv }));
     release_events.push_back(
-        oneapi::mkl::sparse::release_sparse_matrix(main_queue, A_handle, { ev_spmv }));
+        oneapi::math::sparse::release_sparse_matrix(main_queue, A_handle, { ev_spmv }));
     release_events.push_back(
-        oneapi::mkl::sparse::release_spmv_descr(main_queue, descr, { ev_spmv }));
+        oneapi::math::sparse::release_spmv_descr(main_queue, descr, { ev_spmv }));
     for (auto event : release_events) {
         event.wait_and_throw();
     }
@@ -190,7 +190,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device& dev) {
     //
 
     fp* res = y;
-    const bool isConj = (transA == oneapi::mkl::transpose::conjtrans);
+    const bool isConj = (transA == oneapi::math::transpose::conjtrans);
     for (intType row = 0; row < nrows; row++) {
         z[row] *= beta;
     }
diff --git a/include/oneapi/math.hpp b/include/oneapi/math.hpp
new file mode 100644
index 000000000..229ae6c93
--- /dev/null
+++ b/include/oneapi/math.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright 2020-2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#ifndef _ONEMATH_HPP_
+#define _ONEMATH_HPP_
+
+#include "oneapi/math/types.hpp"
+
+#include "oneapi/math/blas.hpp"
+#include "oneapi/math/dft.hpp"
+#include "oneapi/math/lapack.hpp"
+#include "oneapi/math/rng.hpp"
+#include "oneapi/math/sparse_blas.hpp"
+
+#endif //_ONEMATH_HPP_
diff --git a/include/oneapi/mkl/bfloat16.hpp b/include/oneapi/math/bfloat16.hpp
similarity index 99%
rename from include/oneapi/mkl/bfloat16.hpp
rename to include/oneapi/math/bfloat16.hpp
index 127d5ced4..21a84eab7 100644
--- a/include/oneapi/mkl/bfloat16.hpp
+++ b/include/oneapi/math/bfloat16.hpp
@@ -25,7 +25,7 @@
 #include <type_traits>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 namespace bfloat16_impl {
 
@@ -222,7 +222,7 @@ inline bfloat16::operator float() const {
     return bfloat16_impl::raw_to_float(raw << 16);
 }
 
-} /* namespace mkl */
+} /* namespace math */
 } // namespace oneapi
 
 #endif /* _BFLOAT16_HPP__ */
diff --git a/include/oneapi/math/blas.hpp b/include/oneapi/math/blas.hpp
new file mode 100644
index 000000000..a58f72fb4
--- /dev/null
+++ b/include/oneapi/math/blas.hpp
@@ -0,0 +1,73 @@
+/*******************************************************************************
+* Copyright 2020-2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#ifndef _ONEMATH_BLAS_HPP_
+#define _ONEMATH_BLAS_HPP_
+
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
+#include <CL/sycl.hpp>
+#endif
+#include <complex>
+#include <cstdint>
+
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math/types.hpp"
+
+#include "oneapi/math/detail/get_device_id.hpp"
+
+#include "oneapi/math/blas/detail/blas_loader.hpp"
+#ifdef ONEMATH_ENABLE_CUBLAS_BACKEND
+#include "oneapi/math/blas/detail/cublas/blas_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_ROCBLAS_BACKEND
+#include "oneapi/math/blas/detail/rocblas/blas_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
+#include "oneapi/math/blas/detail/mklcpu/blas_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
+#include "oneapi/math/blas/detail/mklgpu/blas_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_NETLIB_BACKEND
+#include "oneapi/math/blas/detail/netlib/blas_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND
+#include "oneapi/math/blas/detail/portblas/blas_ct.hpp"
+#endif
+
+namespace oneapi {
+namespace math {
+namespace blas {
+namespace column_major {
+
+#include "blas.hxx"
+
+} //namespace column_major
+namespace row_major {
+
+#include "blas.hxx"
+
+} //namespace row_major
+} //namespace blas
+} //namespace math
+} //namespace oneapi
+
+#endif //_ONEMATH_BLAS_LOADER_HPP_
diff --git a/include/oneapi/mkl/blas.hxx b/include/oneapi/math/blas.hxx
similarity index 100%
rename from include/oneapi/mkl/blas.hxx
rename to include/oneapi/math/blas.hxx
diff --git a/include/oneapi/mkl/blas/detail/blas_ct_backends.hpp b/include/oneapi/math/blas/detail/blas_ct_backends.hpp
similarity index 94%
rename from include/oneapi/mkl/blas/detail/blas_ct_backends.hpp
rename to include/oneapi/math/blas/detail/blas_ct_backends.hpp
index eb894b5b9..ee2631fba 100644
--- a/include/oneapi/mkl/blas/detail/blas_ct_backends.hpp
+++ b/include/oneapi/math/blas/detail/blas_ct_backends.hpp
@@ -28,11 +28,11 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 
@@ -79,7 +79,7 @@ namespace row_major {
 
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
 #endif //_BLAS_CT_BACKENDS_HPP__
diff --git a/include/oneapi/mkl/blas/detail/blas_ct_backends.hxx b/include/oneapi/math/blas/detail/blas_ct_backends.hxx
similarity index 100%
rename from include/oneapi/mkl/blas/detail/blas_ct_backends.hxx
rename to include/oneapi/math/blas/detail/blas_ct_backends.hxx
diff --git a/include/oneapi/mkl/blas/detail/blas_loader.hpp b/include/oneapi/math/blas/detail/blas_loader.hpp
similarity index 82%
rename from include/oneapi/mkl/blas/detail/blas_loader.hpp
rename to include/oneapi/math/blas/detail/blas_loader.hpp
index 665f5dc80..611add52a 100644
--- a/include/oneapi/mkl/blas/detail/blas_loader.hpp
+++ b/include/oneapi/math/blas/detail/blas_loader.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BLAS_LOADER_HPP_
-#define _ONEMKL_BLAS_LOADER_HPP_
+#ifndef _ONEMATH_BLAS_LOADER_HPP_
+#define _ONEMATH_BLAS_LOADER_HPP_
 
 #include <complex>
 #include <cstdint>
@@ -28,13 +28,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 namespace detail {
@@ -51,7 +51,7 @@ namespace detail {
 } //namespace detail
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_BLAS_LOADER_HPP_
+#endif //_ONEMATH_BLAS_LOADER_HPP_
diff --git a/include/oneapi/math/blas/detail/blas_loader.hxx b/include/oneapi/math/blas/detail/blas_loader.hxx
new file mode 100644
index 000000000..f37cc32a9
--- /dev/null
+++ b/include/oneapi/math/blas/detail/blas_loader.hxx
@@ -0,0 +1,2589 @@
+/*******************************************************************************
+* Copyright 2020-2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+// Buffer APIs
+
+ONEMATH_EXPORT void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, float beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, std::int64_t n, std::int64_t k, double alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, double beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         float alpha, sycl::buffer<float, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         double alpha, sycl::buffer<double, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
+                         std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
+                         sycl::buffer<float, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                        sycl::buffer<float, 1>& a);
+ONEMATH_EXPORT void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                        sycl::buffer<double, 1>& a);
+
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
+                               std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, double beta, sycl::buffer<double, 1>& c,
+                               std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, std::complex<float> beta,
+                               sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, std::complex<double> beta,
+                               sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               sycl::half alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, sycl::half beta,
+                               sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               float alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, float beta,
+                               sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+                               std::int64_t batch_size);
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               float alpha, sycl::buffer<std::int8_t, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int8_t, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, float beta,
+                               sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+                               std::int64_t batch_size);
+ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                               float alpha, sycl::buffer<std::int8_t, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int8_t, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, float beta,
+                               sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
+                         sycl::buffer<float, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, std::int64_t n, std::int64_t k, double alpha,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
+                         sycl::buffer<double, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
+                         std::int64_t ldc);
+ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, std::int64_t n, std::int64_t k,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                               sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
+                               float beta, sycl::buffer<float, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               transpose trans, std::int64_t n, std::int64_t k, double alpha,
+                               sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
+                               double beta, sycl::buffer<double, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               transpose trans, std::int64_t n, std::int64_t k,
+                               std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
+                               sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               transpose trans, std::int64_t n, std::int64_t k,
+                               std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
+                               sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
+ONEMATH_EXPORT void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::int64_t k, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::int64_t k, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c,
+                        float s);
+ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c,
+                        double s);
+ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+                        std::int64_t incy, float c, float s);
+ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+                        std::int64_t incy, double c, double s);
+
+ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<float, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<double, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                          sycl::buffer<float, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                          sycl::buffer<double, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                          std::int64_t incx, std::complex<float> beta,
+                          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                          std::int64_t incx, std::complex<double> beta,
+                          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
+ONEMATH_EXPORT void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                          sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
+                          std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
+                          std::int64_t ldc);
+ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose trans, std::int64_t n, std::int64_t k, double alpha,
+                          sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
+                          std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
+                          std::int64_t ldc);
+ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                          std::int64_t ldb, std::complex<float> beta,
+                          sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                          std::int64_t ldb, std::complex<double> beta,
+                          sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                         std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                         std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
+                         double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                               std::int64_t m, std::int64_t n, float alpha,
+                               sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
+                               sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
+                               float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                               std::int64_t m, std::int64_t n, double alpha,
+                               sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
+                               sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
+                               double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                               std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                               sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
+                               sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                               std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                               sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
+                               sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               std::int64_t m, std::int64_t n, sycl::buffer<float, 1>& a,
+                               std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, sycl::buffer<float, 1>& c,
+                               std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               std::int64_t m, std::int64_t n, sycl::buffer<double, 1>& a,
+                               std::int64_t lda, std::int64_t stridea, sycl::buffer<double, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, sycl::buffer<double, 1>& c,
+                               std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               std::int64_t m, std::int64_t n,
+                               sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
+                               std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               std::int64_t m, std::int64_t n,
+                               sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
+                               std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, float alpha, sycl::buffer<std::complex<float>, 1>& x,
+                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& a,
+                        std::int64_t lda);
+ONEMATH_EXPORT void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, double alpha, sycl::buffer<std::complex<double>, 1>& x,
+                        std::int64_t incx, sycl::buffer<std::complex<double>, 1>& a,
+                        std::int64_t lda);
+
+ONEMATH_EXPORT void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, float alpha, sycl::buffer<std::complex<float>, 1>& x,
+                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& a);
+ONEMATH_EXPORT void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, double alpha, sycl::buffer<std::complex<double>, 1>& x,
+                        std::int64_t incx, sycl::buffer<std::complex<double>, 1>& a);
+
+ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                              transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
+                              std::int64_t k, float alpha, sycl::buffer<int8_t, 1>& a,
+                              std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
+                              std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
+                              std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
+ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                              transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
+                              std::int64_t k, float alpha, sycl::buffer<int8_t, 1>& a,
+                              std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
+                              std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
+                              std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
+ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                              transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
+                              std::int64_t k, float alpha, sycl::buffer<uint8_t, 1>& a,
+                              std::int64_t lda, uint8_t ao, sycl::buffer<int8_t, 1>& b,
+                              std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
+                              std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
+ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                              transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
+                              std::int64_t k, float alpha, sycl::buffer<uint8_t, 1>& a,
+                              std::int64_t lda, uint8_t ao, sycl::buffer<uint8_t, 1>& b,
+                              std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
+                              std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
+
+ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<float, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<double, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                         sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void rotmg(oneapi::math::device libkey, sycl::queue& queue,
+                          sycl::buffer<float, 1>& d1, sycl::buffer<float, 1>& d2,
+                          sycl::buffer<float, 1>& x1, float y1, sycl::buffer<float, 1>& param);
+ONEMATH_EXPORT void rotmg(oneapi::math::device libkey, sycl::queue& queue,
+                          sycl::buffer<double, 1>& d1, sycl::buffer<double, 1>& d2,
+                          sycl::buffer<double, 1>& x1, double y1, sycl::buffer<double, 1>& param);
+
+ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
+ONEMATH_EXPORT void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& result);
+ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& result);
+ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<float, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& result);
+ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<double, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                          float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                          sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
+                          sycl::buffer<float, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                          double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                          sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
+                          sycl::buffer<double, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                          std::int64_t ldb, std::complex<float> beta,
+                          sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                          std::int64_t ldb, std::complex<double> beta,
+                          sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                         transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
+                         sycl::buffer<float, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                         transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
+                         sycl::buffer<double, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                         transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                         std::int64_t ldb, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                         transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                         std::int64_t ldb, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                         transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                         sycl::half alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
+                         sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, sycl::half beta,
+                         sycl::buffer<sycl::half, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                         transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                         float alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
+                         sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, float beta,
+                         sycl::buffer<float, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                         transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                         float alpha, sycl::buffer<bfloat16, 1>& a, std::int64_t lda,
+                         sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
+                         sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
+                         std::int64_t lda);
+ONEMATH_EXPORT void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
+                         sycl::buffer<double, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                        std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                        sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
+                        std::int64_t lda);
+ONEMATH_EXPORT void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                        std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                        sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a,
+                        std::int64_t lda);
+
+ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
+
+ONEMATH_EXPORT void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& result);
+ONEMATH_EXPORT void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& result);
+
+ONEMATH_EXPORT void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                         std::int64_t ldb, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                         std::int64_t ldb, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& a);
+ONEMATH_EXPORT void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& a);
+
+ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                         sycl::buffer<double, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                         std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, std::int64_t m, std::int64_t n, float alpha,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
+                         std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, std::int64_t m, std::int64_t n, double alpha,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
+                         std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
+                         std::int64_t ldc);
+ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                         std::int64_t ldb, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                         std::int64_t ldb, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& result);
+ONEMATH_EXPORT void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& result);
+
+ONEMATH_EXPORT void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                        sycl::buffer<float, 1>& a, std::int64_t lda);
+ONEMATH_EXPORT void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                        std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                        sycl::buffer<double, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                         uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
+
+ONEMATH_EXPORT void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                         sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
+                         sycl::buffer<float, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
+                         std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+ONEMATH_EXPORT void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<float, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<double, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
+                         std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+ONEMATH_EXPORT void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
+                         std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
+                         double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& result);
+ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& result);
+ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<float, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& result);
+ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<double, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
+                         std::int64_t incx);
+ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a);
+ONEMATH_EXPORT void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                         std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
+                         sycl::buffer<double, 1>& a);
+
+ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                               std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                               std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                               std::int64_t n, std::complex<float> alpha,
+                               sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                               std::int64_t n, std::complex<double> alpha,
+                               sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+
+ONEMATH_EXPORT void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+                         std::int64_t incy, sycl::buffer<float, 1>& param);
+ONEMATH_EXPORT void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+                         std::int64_t incy, sycl::buffer<double, 1>& param);
+
+ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+                        std::int64_t incy, sycl::buffer<float, 1>& result);
+ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+                        std::int64_t incy, sycl::buffer<double, 1>& result);
+ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+                        std::int64_t incy, sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                           float sb, sycl::buffer<float, 1>& x, std::int64_t incx,
+                           sycl::buffer<float, 1>& y, std::int64_t incy,
+                           sycl::buffer<float, 1>& result);
+
+ONEMATH_EXPORT void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                          std::int64_t ldb, float beta, sycl::buffer<std::complex<float>, 1>& c,
+                          std::int64_t ldc);
+ONEMATH_EXPORT void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                          transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                          std::int64_t ldb, double beta, sycl::buffer<std::complex<double>, 1>& c,
+                          std::int64_t ldc);
+
+ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& a,
+                         sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c,
+                         sycl::buffer<float, 1>& s);
+ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue,
+                         sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& b,
+                         sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s);
+ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue,
+                         sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
+                         sycl::buffer<std::complex<float>, 1>& s);
+ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue,
+                         sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
+                         sycl::buffer<std::complex<double>, 1>& s);
+
+ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, float alpha,
+                                   sycl::buffer<float, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<float, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, double alpha,
+                                   sycl::buffer<double, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<double, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                   sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                   sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+
+ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, float alpha,
+                                   sycl::buffer<float, 1>& ab, std::int64_t lda, std::int64_t ldb,
+                                   std::int64_t stride, std::int64_t batch_size);
+ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, double alpha,
+                                   sycl::buffer<double, 1>& ab, std::int64_t lda, std::int64_t ldb,
+                                   std::int64_t stride, std::int64_t batch_size);
+ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                   sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
+                                   std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
+ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                   sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
+                                   std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                  transpose transb, std::int64_t m, std::int64_t n, float alpha,
+                                  sycl::buffer<float, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
+                                  std::int64_t ldb, std::int64_t stride_b,
+                                  sycl::buffer<float, 1>& c, std::int64_t ldc,
+                                  std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                  transpose transb, std::int64_t m, std::int64_t n, double alpha,
+                                  sycl::buffer<double, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
+                                  std::int64_t ldb, std::int64_t stride_b,
+                                  sycl::buffer<double, 1>& c, std::int64_t ldc,
+                                  std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                  transpose transb, std::int64_t m, std::int64_t n,
+                                  std::complex<float> alpha,
+                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, std::complex<float> beta,
+                                  sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                                  std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
+                                  std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                  transpose transb, std::int64_t m, std::int64_t n,
+                                  std::complex<double> alpha,
+                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, std::complex<double> beta,
+                                  sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                                  std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
+                                  std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                             std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, double alpha,
+                             sycl::buffer<double, 1>& a, std::int64_t lda,
+                             sycl::buffer<double, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                             sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                             sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
+ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                             sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                             sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
+
+ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                              std::int64_t m, std::int64_t n, float alpha,
+                              sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
+                              sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t strideb);
+ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                              std::int64_t m, std::int64_t n, double alpha,
+                              sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
+                              sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t strideb);
+ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                              std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                              std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
+                              std::int64_t ldb, std::int64_t strideb);
+ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                              std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                              std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
+                              std::int64_t ldb, std::int64_t strideb);
+
+ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, float alpha,
+                             sycl::buffer<float, 1>& ab, std::int64_t lda, std::int64_t ldb);
+ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, double alpha,
+                             sycl::buffer<double, 1>& ab, std::int64_t lda, std::int64_t ldb);
+ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                             sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
+                             std::int64_t ldb);
+ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                             std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                             sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
+                             std::int64_t ldb);
+
+ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                            transpose transb, std::int64_t m, std::int64_t n, float alpha,
+                            sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
+                            sycl::buffer<float, 1>& b, std::int64_t ldb, sycl::buffer<float, 1>& c,
+                            std::int64_t ldc);
+ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                            transpose transb, std::int64_t m, std::int64_t n, double alpha,
+                            sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
+                            sycl::buffer<double, 1>& b, std::int64_t ldb,
+                            sycl::buffer<double, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                            transpose transb, std::int64_t m, std::int64_t n,
+                            std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                            std::int64_t lda, std::complex<float> beta,
+                            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                            transpose transb, std::int64_t m, std::int64_t n,
+                            std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                            std::int64_t lda, std::complex<double> beta,
+                            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+// USM APIs
+
+ONEMATH_EXPORT sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                                const std::complex<float>* a, std::int64_t lda, float beta,
+                                std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, std::int64_t n, std::int64_t k, double alpha,
+                                const std::complex<double>* a, std::int64_t lda, double beta,
+                                std::complex<double>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                float alpha, float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                double alpha, double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                std::complex<float> alpha, std::complex<float>* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                std::complex<double> alpha, std::complex<double>* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                float alpha, std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                double alpha, std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const float* a,
+                                std::int64_t lda, float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const double* a,
+                                std::int64_t lda, double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const float* a,
+                                float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const double* a,
+                                double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<float>* a, std::complex<float>* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<double>* a, std::complex<double>* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, float alpha, const float* x, std::int64_t incx,
+                               float* a, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, double alpha, const double* x, std::int64_t incx,
+                               double* a, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const float** a, std::int64_t* lda, const float** b,
+                                      std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, double* alpha,
+                                      const double** a, std::int64_t* lda, const double** b,
+                                      std::int64_t* ldb, double* beta, double** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, std::complex<float>* alpha,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      const std::complex<float>** b, std::int64_t* ldb,
+                                      std::complex<float>* beta, std::complex<float>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      const std::complex<double>** b, std::int64_t* ldb,
+                                      std::complex<double>* beta, std::complex<double>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, sycl::half* alpha,
+                                      const sycl::half** a, std::int64_t* lda, const sycl::half** b,
+                                      std::int64_t* ldb, sycl::half* beta, sycl::half** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const sycl::half** a, std::int64_t* lda, const sycl::half** b,
+                                      std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const std::int8_t** a, std::int64_t* lda,
+                                      const std::int8_t** b, std::int64_t* ldb, float* beta,
+                                      float** c, std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* transa, transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const std::int8_t** a, std::int64_t* lda,
+                                      const std::int8_t** b, std::int64_t* ldb, float* beta,
+                                      std::int32_t** c, std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose transa, transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha, const float* a,
+                                      std::int64_t lda, std::int64_t stride_a, const float* b,
+                                      std::int64_t ldb, std::int64_t stride_b, float beta, float* c,
+                                      std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose transa, transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, double alpha, const double* a,
+                                      std::int64_t lda, std::int64_t stride_a, const double* b,
+                                      std::int64_t ldb, std::int64_t stride_b, double beta,
+                                      double* c, std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(
+    oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
+    const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
+    const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<float> beta,
+    std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
+    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(
+    oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
+    const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
+    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
+    std::complex<double> beta, std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
+    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose transa, transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, sycl::half alpha,
+                                      const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
+                                      const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
+                                      sycl::half beta, sycl::half* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose transa, transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha,
+                                      const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
+                                      const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
+                                      float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose transa, transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha,
+                                      const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
+                                      const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
+                                      float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose transa, transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha,
+                                      const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
+                                      const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
+                                      float beta, std::int32_t* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                                const float* a, std::int64_t lda, float beta, float* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, std::int64_t n, std::int64_t k, double alpha,
+                                const double* a, std::int64_t lda, double beta, double* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, std::int64_t n, std::int64_t k,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, std::complex<float> beta, std::complex<float>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, std::int64_t n, std::int64_t k,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, std::complex<double> beta,
+                                std::complex<double>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo* upper_lower, transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, float* alpha, const float** a,
+                                      std::int64_t* lda, float* beta, float** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo* upper_lower, transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, double* alpha, const double** a,
+                                      std::int64_t* lda, double* beta, double** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo* upper_lower, transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, std::complex<float>* alpha,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      std::complex<float>* beta, std::complex<float>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo* upper_lower, transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, std::complex<double>* alpha,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      std::complex<double>* beta, std::complex<double>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo upper_lower, transpose trans, std::int64_t n,
+                                      std::int64_t k, float alpha, const float* a, std::int64_t lda,
+                                      std::int64_t stride_a, float beta, float* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo upper_lower, transpose trans, std::int64_t n,
+                                      std::int64_t k, double alpha, const double* a,
+                                      std::int64_t lda, std::int64_t stride_a, double beta,
+                                      double* c, std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo upper_lower, transpose trans, std::int64_t n,
+                                      std::int64_t k, std::complex<float> alpha,
+                                      const std::complex<float>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::complex<float> beta,
+                                      std::complex<float>* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      uplo upper_lower, transpose trans, std::int64_t n,
+                                      std::int64_t k, std::complex<double> alpha,
+                                      const std::complex<double>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::complex<double> beta,
+                                      std::complex<double>* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* x, std::int64_t incx,
+                                const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* x, std::int64_t incx,
+                                const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::int64_t k, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::int64_t k, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
+                               std::int64_t incy, float c, float s,
+                               const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
+                               std::int64_t incy, double c, double s,
+                               const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               float* x, std::int64_t incx, float* y, std::int64_t incy, float c,
+                               float s, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               double* x, std::int64_t incx, double* y, std::int64_t incy, double c,
+                               double s, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                float alpha, const float* x, std::int64_t incx, float* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                double alpha, const double* x, std::int64_t incx, double* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* x,
+                                std::int64_t incx, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* x,
+                                std::int64_t incx, std::complex<double>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, float* alpha, const float** x,
+                                      std::int64_t* incx, float** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, double* alpha, const double** x,
+                                      std::int64_t* incx, double** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, std::complex<float>* alpha,
+                                      const std::complex<float>** x, std::int64_t* incx,
+                                      std::complex<float>** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, std::complex<double>* alpha,
+                                      const std::complex<double>** x, std::int64_t* incx,
+                                      std::complex<double>** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, float alpha, const float* x,
+                                      std::int64_t incx, std::int64_t stridex, float* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, double alpha, const double* x,
+                                      std::int64_t incx, std::int64_t stridex, double* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, std::complex<float> alpha,
+                                      const std::complex<float>* x, std::int64_t incx,
+                                      std::int64_t stridex, std::complex<float>* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, std::complex<double> alpha,
+                                      const std::complex<double>* x, std::int64_t incx,
+                                      std::int64_t stridex, std::complex<double>* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 float alpha, const float* x, std::int64_t incx, const float beta,
+                                 float* y, std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 double alpha, const double* x, std::int64_t incx,
+                                 const double beta, double* y, std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 std::complex<float> alpha, const std::complex<float>* x,
+                                 std::int64_t incx, const std::complex<float> beta,
+                                 std::complex<float>* y, std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 std::complex<double> alpha, const std::complex<double>* x,
+                                 std::int64_t incx, const std::complex<double> beta,
+                                 std::complex<double>* y, std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* x, std::int64_t incx,
+                                const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* x, std::int64_t incx,
+                                const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                                 const float* a, std::int64_t lda, const float* b, std::int64_t ldb,
+                                 float beta, float* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose trans, std::int64_t n, std::int64_t k, double alpha,
+                                 const double* a, std::int64_t lda, const double* b,
+                                 std::int64_t ldb, double beta, double* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<float> alpha, const std::complex<float>* a,
+                                 std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<double> alpha, const std::complex<double>* a,
+                                 std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double> beta, std::complex<double>* c,
+                                 std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, float alpha, const float* a,
+                                std::int64_t lda, const float* x, std::int64_t incx, float beta,
+                                float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, double alpha, const double* a,
+                                std::int64_t lda, const double* x, std::int64_t incx, double beta,
+                                double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose trans, std::int64_t m, std::int64_t n, float alpha,
+                                      const float* a, std::int64_t lda, std::int64_t stridea,
+                                      const float* x, std::int64_t incx, std::int64_t stridex,
+                                      float beta, float* y, std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose trans, std::int64_t m, std::int64_t n, double alpha,
+                                      const double* a, std::int64_t lda, std::int64_t stridea,
+                                      const double* x, std::int64_t incx, std::int64_t stridex,
+                                      double beta, double* y, std::int64_t incy,
+                                      std::int64_t stridey, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv_batch(
+    oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+    std::int64_t n, std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
+    std::int64_t stridea, const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
+    std::complex<float> beta, std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
+    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv_batch(
+    oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+    std::int64_t n, std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
+    std::int64_t stridea, const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
+    std::complex<double> beta, std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
+    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* trans, std::int64_t* m, std::int64_t* n,
+                                      float* alpha, const float** a, std::int64_t* lda,
+                                      const float** x, std::int64_t* incx, float* beta, float** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* trans, std::int64_t* m, std::int64_t* n,
+                                      double* alpha, const double** a, std::int64_t* lda,
+                                      const double** x, std::int64_t* incx, double* beta,
+                                      double** y, std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* trans, std::int64_t* m, std::int64_t* n,
+                                      std::complex<float>* alpha, const std::complex<float>** a,
+                                      std::int64_t* lda, const std::complex<float>** x,
+                                      std::int64_t* incx, std::complex<float>* beta,
+                                      std::complex<float>** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      transpose* trans, std::int64_t* m, std::int64_t* n,
+                                      std::complex<double>* alpha, const std::complex<double>** a,
+                                      std::int64_t* lda, const std::complex<double>** x,
+                                      std::int64_t* incx, std::complex<double>* beta,
+                                      std::complex<double>** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, std::int64_t m, std::int64_t n,
+                                      const float* a, std::int64_t lda, std::int64_t stridea,
+                                      const float* x, std::int64_t incx, std::int64_t stridex,
+                                      float* c, std::int64_t ldc, std::int64_t stridec,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, std::int64_t m, std::int64_t n,
+                                      const double* a, std::int64_t lda, std::int64_t stridea,
+                                      const double* x, std::int64_t incx, std::int64_t stridex,
+                                      double* c, std::int64_t ldc, std::int64_t stridec,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, std::int64_t m, std::int64_t n,
+                                      const std::complex<float>* a, std::int64_t lda,
+                                      std::int64_t stridea, const std::complex<float>* x,
+                                      std::int64_t incx, std::int64_t stridex,
+                                      std::complex<float>* c, std::int64_t ldc,
+                                      std::int64_t stridec, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, std::int64_t m, std::int64_t n,
+                                      const std::complex<double>* a, std::int64_t lda,
+                                      std::int64_t stridea, const std::complex<double>* x,
+                                      std::int64_t incx, std::int64_t stridex,
+                                      std::complex<double>* c, std::int64_t ldc,
+                                      std::int64_t stridec, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, std::int64_t* m, std::int64_t* n,
+                                      const float** a, std::int64_t* lda, const float** x,
+                                      std::int64_t* incx, float** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, std::int64_t* m, std::int64_t* n,
+                                      const double** a, std::int64_t* lda, const double** x,
+                                      std::int64_t* incx, double** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, std::int64_t* m, std::int64_t* n,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      const std::complex<float>** x, std::int64_t* incx,
+                                      std::complex<float>** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, std::int64_t* m, std::int64_t* n,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      const std::complex<double>** x, std::int64_t* incx,
+                                      std::complex<double>** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, float alpha, const std::complex<float>* x,
+                               std::int64_t incx, std::complex<float>* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, double alpha, const std::complex<double>* x,
+                               std::int64_t incx, std::complex<double>* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, float alpha, const std::complex<float>* x,
+                               std::int64_t incx, std::complex<float>* a,
+                               const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, double alpha, const std::complex<double>* x,
+                               std::int64_t incx, std::complex<double>* a,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const float* x, std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const double* x, std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const std::complex<float>* x, std::int64_t incx,
+                                 std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const std::complex<double>* x, std::int64_t incx,
+                                 std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* a, const std::complex<float>* x,
+                                std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* a, const std::complex<double>* x,
+                                std::int64_t incx, std::complex<double> beta,
+                                std::complex<double>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, float alpha, const float* a, const float* x,
+                                std::int64_t incx, float beta, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, double alpha, const double* a, const double* x,
+                                std::int64_t incx, double beta, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, float* d1,
+                                 float* d2, float* x1, float y1, float* param,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, double* d1,
+                                 double* d2, double* x1, double y1, double* param,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                float* x, std::int64_t incx, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                double* x, std::int64_t incx, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* x, std::int64_t incx,
+                                const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* x, std::int64_t incx,
+                                const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<float>* x, std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<double>* x, std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const float* x, std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const double* x, std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                                 float alpha, const float* a, std::int64_t lda, const float* b,
+                                 std::int64_t ldb, float beta, float* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                                 double alpha, const double* a, std::int64_t lda, const double* b,
+                                 std::int64_t ldb, double beta, double* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                                 std::complex<float> alpha, const std::complex<float>* a,
+                                 std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose transa, transpose transb, std::int64_t n, std::int64_t k,
+                                 std::complex<double> alpha, const std::complex<double>* a,
+                                 std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double> beta, std::complex<double>* c,
+                                 std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                                float alpha, const float* a, std::int64_t lda, const float* b,
+                                std::int64_t ldb, float beta, float* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                                double alpha, const double* a, std::int64_t lda, const double* b,
+                                std::int64_t ldb, double beta, double* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                std::complex<double> beta, std::complex<double>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::half alpha, const sycl::half* a, std::int64_t lda,
+                                const sycl::half* b, std::int64_t ldb, sycl::half beta,
+                                sycl::half* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                                float alpha, const sycl::half* a, std::int64_t lda,
+                                const sycl::half* b, std::int64_t ldb, float beta, float* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                                transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                                float alpha, const bfloat16* a, std::int64_t lda, const bfloat16* b,
+                                std::int64_t ldb, float beta, float* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose transa, transpose transb, offset offsetc, int64_t m,
+                                     int64_t n, int64_t k, float alpha, const std::int8_t* a,
+                                     int64_t lda, std::int8_t ao, const std::uint8_t* b,
+                                     int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c,
+                                     int64_t ldc, const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose transa, transpose transb, offset offsetc, int64_t m,
+                                     int64_t n, int64_t k, float alpha, const std::int8_t* a,
+                                     int64_t lda, std::int8_t ao, const std::int8_t* b, int64_t ldb,
+                                     std::int8_t bo, float beta, std::int32_t* c, int64_t ldc,
+                                     const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose transa, transpose transb, offset offsetc, int64_t m,
+                                     int64_t n, int64_t k, float alpha, const std::uint8_t* a,
+                                     int64_t lda, std::uint8_t ao, const std::int8_t* b,
+                                     int64_t ldb, std::int8_t bo, float beta, std::int32_t* c,
+                                     int64_t ldc, const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose transa, transpose transb, offset offsetc, int64_t m,
+                                     int64_t n, int64_t k, float alpha, const std::uint8_t* a,
+                                     int64_t lda, std::uint8_t ao, const std::uint8_t* b,
+                                     int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c,
+                                     int64_t ldc, const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, float alpha, const float* x, std::int64_t incx,
+                                const float* y, std::int64_t incy, float* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, double alpha, const double* x, std::int64_t incx,
+                                const double* y, std::int64_t incy, double* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                               std::int64_t n, float alpha, const float* x, std::int64_t incx,
+                               const float* y, std::int64_t incy, float* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                               std::int64_t n, double alpha, const double* x, std::int64_t incx,
+                               const double* y, std::int64_t incy, double* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, float alpha, const float* a, std::int64_t lda,
+                                float* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, double alpha, const double* a, std::int64_t lda,
+                                double* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, uplo upper_lower, transpose trans,
+                                      diag unit_diag, int64_t m, int64_t n, float alpha,
+                                      const float* a, int64_t lda, int64_t stride_a, float* b,
+                                      int64_t ldb, int64_t stride_b, int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, uplo upper_lower, transpose trans,
+                                      diag unit_diag, int64_t m, int64_t n, double alpha,
+                                      const double* a, int64_t lda, int64_t stride_a, double* b,
+                                      int64_t ldb, int64_t stride_b, int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, uplo upper_lower, transpose trans,
+                                      diag unit_diag, int64_t m, int64_t n,
+                                      std::complex<float> alpha, const std::complex<float>* a,
+                                      int64_t lda, int64_t stride_a, std::complex<float>* b,
+                                      int64_t ldb, int64_t stride_b, int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side left_right, uplo upper_lower, transpose trans,
+                                      diag unit_diag, int64_t m, int64_t n,
+                                      std::complex<double> alpha, const std::complex<double>* a,
+                                      int64_t lda, int64_t stride_a, std::complex<double>* b,
+                                      int64_t ldb, int64_t stride_b, int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, uplo* upper_lower, transpose* trans,
+                                      diag* unit_diag, int64_t* m, int64_t* n, float* alpha,
+                                      const float** a, int64_t* lda, float** b, int64_t* ldb,
+                                      int64_t group_count, int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, uplo* upper_lower, transpose* trans,
+                                      diag* unit_diag, int64_t* m, int64_t* n, double* alpha,
+                                      const double** a, int64_t* lda, double** b, int64_t* ldb,
+                                      int64_t group_count, int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, uplo* upper_lower, transpose* trans,
+                                      diag* unit_diag, int64_t* m, int64_t* n,
+                                      std::complex<float>* alpha, const std::complex<float>** a,
+                                      int64_t* lda, std::complex<float>** b, int64_t* ldb,
+                                      int64_t group_count, int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      side* left_right, uplo* upper_lower, transpose* trans,
+                                      diag* unit_diag, int64_t* m, int64_t* n,
+                                      std::complex<double>* alpha, const std::complex<double>** a,
+                                      int64_t* lda, std::complex<double>** b, int64_t* ldb,
+                                      int64_t group_count, int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<float>* x, std::int64_t incx,
+                                const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<double>* x, std::int64_t incx,
+                                const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                std::complex<double> beta, std::complex<double>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* x, std::int64_t incx,
+                                const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* x, std::int64_t incx,
+                                const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                                float alpha, const float* a, std::int64_t lda, const float* x,
+                                std::int64_t incx, float beta, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                                double alpha, const double* a, std::int64_t lda, const double* x,
+                                std::int64_t incx, double beta, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const float* a, std::int64_t lda, float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const double* a, std::int64_t lda, double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, std::int64_t m, std::int64_t n, float alpha,
+                                const float* a, std::int64_t lda, const float* b, std::int64_t ldb,
+                                float beta, float* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, std::int64_t m, std::int64_t n, double alpha,
+                                const double* a, std::int64_t lda, const double* b,
+                                std::int64_t ldb, double beta, double* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                std::complex<double> beta, std::complex<double>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<float>* x, std::int64_t incx,
+                                const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<double>* x, std::int64_t incx,
+                                const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, float alpha, const float* x, std::int64_t incx,
+                               float* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                               std::int64_t n, double alpha, const double* x, std::int64_t incx,
+                               double* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, float alpha, const float* a, std::int64_t lda,
+                                float* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, double alpha, const double* a, std::int64_t lda,
+                                double* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right,
+                                uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, float alpha, const float* a, std::int64_t lda,
+                                const float* x, std::int64_t incx, float beta, float* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, double alpha, const double* a, std::int64_t lda,
+                                const double* x, std::int64_t incx, double beta, double* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const float* a,
+                                float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const double* a,
+                                double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<float>* a, std::complex<float>* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<double>* a, std::complex<double>* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const float* a,
+                                std::int64_t lda, float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, const double* a,
+                                std::int64_t lda, double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n,
+                                const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const float* x, std::int64_t incx, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const double* x, std::int64_t incx, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, const float** x, std::int64_t* incx,
+                                      float** y, std::int64_t* incy, int64_t group_count,
+                                      int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, const double** x, std::int64_t* incx,
+                                      double** y, std::int64_t* incy, int64_t group_count,
+                                      int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, const std::complex<float>** x,
+                                      std::int64_t* incx, std::complex<float>** y,
+                                      std::int64_t* incy, int64_t group_count, int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t* n, const std::complex<double>** x,
+                                      std::int64_t* incx, std::complex<double>** y,
+                                      std::int64_t* incy, int64_t group_count, int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, const float* x, std::int64_t incx,
+                                      std::int64_t stridex, float* y, std::int64_t incy,
+                                      std::int64_t stridey, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, const double* x, std::int64_t incx,
+                                      std::int64_t stridex, double* y, std::int64_t incy,
+                                      std::int64_t stridey, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, const std::complex<float>* x,
+                                      std::int64_t incx, std::int64_t stridex,
+                                      std::complex<float>* y, std::int64_t incy,
+                                      std::int64_t stridey, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                      std::int64_t n, const std::complex<double>* x,
+                                      std::int64_t incx, std::int64_t stridex,
+                                      std::complex<double>* y, std::int64_t incy,
+                                      std::int64_t stridey, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const float* x, std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const double* x, std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const std::complex<float>* x, std::int64_t incx,
+                                 std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 const std::complex<double>* x, std::int64_t incx,
+                                 std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::int64_t k, float alpha, const float* a,
+                                std::int64_t lda, const float* x, std::int64_t incx, float beta,
+                                float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, std::int64_t k, double alpha, const double* a,
+                                std::int64_t lda, const double* x, std::int64_t incx, double beta,
+                                double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<float>* x, std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const std::complex<double>* x, std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const float* x, std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                const double* x, std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const float* a, std::int64_t lda, float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const double* a, std::int64_t lda, double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
+                                const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, float alpha, const float* x, std::int64_t incx,
+                                const float* y, std::int64_t incy, float* a,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                std::int64_t n, double alpha, const double* x, std::int64_t incx,
+                                const double* y, std::int64_t incy, double* a,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                float* x, std::int64_t incx, float* y, std::int64_t incy,
+                                float* param, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                double* x, std::int64_t incx, double* y, std::int64_t incy,
+                                double* param, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               const float* x, std::int64_t incx, const float* y, std::int64_t incy,
+                               float* result, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               const double* x, std::int64_t incx, const double* y,
+                               std::int64_t incy, double* result,
+                               const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                               const float* x, std::int64_t incx, const float* y, std::int64_t incy,
+                               double* result, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                  float sb, const float* x, std::int64_t incx, const float* y,
+                                  std::int64_t incy, float* result,
+                                  const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<float> alpha, const std::complex<float>* a,
+                                 std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                 float beta, std::complex<float>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                                 transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<double> alpha, const std::complex<double>* a,
+                                 std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                 double beta, std::complex<double>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, float* a, float* b,
+                                float* c, float* s,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, double* a,
+                                double* b, double* c, double* s,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue,
+                                std::complex<float>* a, std::complex<float>* b, float* c,
+                                std::complex<float>* s,
+                                const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue,
+                                std::complex<double>* a, std::complex<double>* b, double* c,
+                                std::complex<double>* s,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          float alpha, const float* a, std::int64_t lda,
+                                          std::int64_t stride_a, float* b, std::int64_t ldb,
+                                          std::int64_t stride_b, std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          double alpha, const double* a, std::int64_t lda,
+                                          std::int64_t stride_a, double* b, std::int64_t ldb,
+                                          std::int64_t stride_b, std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          std::complex<float> alpha, const std::complex<float>* a,
+                                          std::int64_t lda, std::int64_t stride_a,
+                                          std::complex<float>* b, std::int64_t ldb,
+                                          std::int64_t stride_b, std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          std::complex<double> alpha, const std::complex<double>* a,
+                                          std::int64_t lda, std::int64_t stride_a,
+                                          std::complex<double>* b, std::int64_t ldb,
+                                          std::int64_t stride_b, std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          float alpha, float* ab, std::int64_t lda,
+                                          std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          double alpha, double* ab, std::int64_t lda,
+                                          std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          std::complex<float> alpha, std::complex<float>* ab,
+                                          std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose trans, std::int64_t m, std::int64_t n,
+                                          std::complex<double> alpha, std::complex<double>* ab,
+                                          std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                         transpose transa, transpose transb, std::int64_t m,
+                                         std::int64_t n, float alpha, const float* a,
+                                         std::int64_t lda, std::int64_t stride_a, float beta,
+                                         const float* b, std::int64_t ldb, std::int64_t stride_b,
+                                         float* c, std::int64_t ldc, std::int64_t stride_c,
+                                         std::int64_t batch_size,
+                                         const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                         transpose transa, transpose transb, std::int64_t m,
+                                         std::int64_t n, double alpha, const double* a,
+                                         std::int64_t lda, std::int64_t stride_a, double beta,
+                                         const double* b, std::int64_t ldb, std::int64_t stride_b,
+                                         double* c, std::int64_t ldc, std::int64_t stride_c,
+                                         std::int64_t batch_size,
+                                         const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatadd_batch(
+    oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+    std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
+    std::int64_t lda, std::int64_t stride_a, std::complex<float> beta, const std::complex<float>* b,
+    std::int64_t ldb, std::int64_t stride_b, std::complex<float>* c, std::int64_t ldc,
+    std::int64_t stride_c, std::int64_t batch_size,
+    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatadd_batch(
+    oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+    std::int64_t m, std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
+    std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
+    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<double>* c,
+    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
+    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n, float alpha,
+                                    const float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n, double alpha,
+                                    const double* a, std::int64_t lda, double* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n,
+                                    std::complex<float> alpha, const std::complex<float>* a,
+                                    std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n,
+                                    std::complex<double> alpha, const std::complex<double>* a,
+                                    std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose trans, std::int64_t m, std::int64_t n, float alpha,
+                                     const float* a, std::int64_t lda, std::int64_t stridea,
+                                     float* b, std::int64_t ldb, std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose trans, std::int64_t m, std::int64_t n, double alpha,
+                                     const double* a, std::int64_t lda, std::int64_t stridea,
+                                     double* b, std::int64_t ldb, std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose trans, std::int64_t m, std::int64_t n,
+                                     std::complex<float> alpha, const std::complex<float>* a,
+                                     std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
+                                     std::int64_t ldb, std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue,
+                                     transpose trans, std::int64_t m, std::int64_t n,
+                                     std::complex<double> alpha, const std::complex<double>* a,
+                                     std::int64_t lda, std::int64_t stridea,
+                                     std::complex<double>* b, std::int64_t ldb,
+                                     std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n, float alpha,
+                                    float* ab, std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n, double alpha,
+                                    double* ab, std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n,
+                                    std::complex<float> alpha, std::complex<float>* ab,
+                                    std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue,
+                                    transpose trans, std::int64_t m, std::int64_t n,
+                                    std::complex<double> alpha, std::complex<double>* ab,
+                                    std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue,
+                                   transpose transa, transpose transb, std::int64_t m,
+                                   std::int64_t n, float alpha, const float* a, std::int64_t lda,
+                                   float beta, const float* b, std::int64_t ldb, float* c,
+                                   std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue,
+                                   transpose transa, transpose transb, std::int64_t m,
+                                   std::int64_t n, double alpha, const double* a, std::int64_t lda,
+                                   double beta, const double* b, std::int64_t ldb, double* c,
+                                   std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue,
+                                   transpose transa, transpose transb, std::int64_t m,
+                                   std::int64_t n, std::complex<float> alpha,
+                                   const std::complex<float>* a, std::int64_t lda,
+                                   std::complex<float> beta, const std::complex<float>* b,
+                                   std::int64_t ldb, std::complex<float>* c, std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue,
+                                   transpose transa, transpose transb, std::int64_t m,
+                                   std::int64_t n, std::complex<double> alpha,
+                                   const std::complex<double>* a, std::int64_t lda,
+                                   std::complex<double> beta, const std::complex<double>* b,
+                                   std::int64_t ldb, std::complex<double>* c, std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          float* alpha, const float** a, std::int64_t* lda,
+                                          float** b, std::int64_t* ldb, std::int64_t group_count,
+                                          std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          double* alpha, const double** a, std::int64_t* lda,
+                                          double** b, std::int64_t* ldb, std::int64_t group_count,
+                                          std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          std::complex<float>* alpha, const std::complex<float>** a,
+                                          std::int64_t* lda, std::complex<float>** b,
+                                          std::int64_t* ldb, std::int64_t group_count,
+                                          std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          std::complex<double>* alpha,
+                                          const std::complex<double>** a, std::int64_t* lda,
+                                          std::complex<double>** b, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          float* alpha, float** ab, std::int64_t* lda,
+                                          std::int64_t* ldb, std::int64_t group_count,
+                                          std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          double* alpha, double** ab, std::int64_t* lda,
+                                          std::int64_t* ldb, std::int64_t group_count,
+                                          std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          std::complex<float>* alpha, std::complex<float>** ab,
+                                          std::int64_t* lda, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                          transpose* trans, std::int64_t* m, std::int64_t* n,
+                                          std::complex<double>* alpha, std::complex<double>** ab,
+                                          std::int64_t* lda, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
diff --git a/include/oneapi/mkl/blas/detail/cublas/blas_ct.hpp b/include/oneapi/math/blas/detail/cublas/blas_ct.hpp
similarity index 85%
rename from include/oneapi/mkl/blas/detail/cublas/blas_ct.hpp
rename to include/oneapi/math/blas/detail/cublas/blas_ct.hpp
index 2443c64d2..6f3384ecd 100644
--- a/include/oneapi/mkl/blas/detail/cublas/blas_ct.hpp
+++ b/include/oneapi/math/blas/detail/cublas/blas_ct.hpp
@@ -28,13 +28,13 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp"
-#include "oneapi/mkl/blas/detail/blas_ct_backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp"
+#include "oneapi/math/blas/detail/blas_ct_backends.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 
@@ -51,7 +51,7 @@ namespace row_major {
 
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
 #endif //_DETAIL_CUBLAS_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx b/include/oneapi/math/blas/detail/cublas/blas_ct.hxx
similarity index 72%
rename from include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx
rename to include/oneapi/math/blas/detail/cublas/blas_ct.hxx
index d5678917e..4a0898f9a 100644
--- a/include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx
+++ b/include/oneapi/math/blas/detail/cublas/blas_ct.hxx
@@ -20,112 +20,112 @@
 void herk(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, float beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void herk(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<std::complex<double>, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void scal(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void trmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void spr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void spr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -134,9 +134,9 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -145,9 +145,9 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -156,9 +156,9 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -167,9 +167,9 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -178,9 +178,9 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -189,9 +189,9 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -200,9 +200,9 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -211,57 +211,57 @@ void gemm_batch(backend_selector<backend::cublas> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void syrk(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
@@ -269,9 +269,9 @@ void syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
@@ -279,181 +279,186 @@ void syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void her2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void her2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void hbmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void hbmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void rot(backend_selector<backend::cublas> selector, std::int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::cublas> selector, std::int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void axpy(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy_batch(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::cublas> selector, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::cublas> selector, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpby(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
            sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
            sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void sdsdot(backend_selector<backend::cublas> selector, std::int64_t n, float sb,
             sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
             std::int64_t incy, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
+                                              result);
 }
 
 void gerc(backend_selector<backend::cublas> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void gerc(backend_selector<backend::cublas> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void syr2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
            sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
@@ -461,8 +466,8 @@ void syr2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
@@ -470,40 +475,40 @@ void syr2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
           std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
           std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -511,9 +516,9 @@ void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
                 std::int64_t stridex, float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -521,9 +526,9 @@ void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
                 std::int64_t stridex, double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -532,9 +537,9 @@ void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -543,9 +548,9 @@ void gemv_batch(backend_selector<backend::cublas> selector, transpose trans, std
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std::int64_t m,
@@ -553,9 +558,9 @@ void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std::int64_t m,
@@ -563,9 +568,9 @@ void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std::int64_t m,
@@ -573,9 +578,9 @@ void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std::int64_t m,
@@ -583,87 +588,87 @@ void dgmm_batch(backend_selector<backend::cublas> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void her(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void her(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void hpr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void hpr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void iamin(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::cublas> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::cublas> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void hpmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void hpmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx,
           double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -671,8 +676,9 @@ void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -680,8 +686,9 @@ void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -689,8 +696,9 @@ void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -698,84 +706,85 @@ void gemm_bias(backend_selector<backend::cublas> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void swap(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void geru(backend_selector<backend::cublas> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void geru(backend_selector<backend::cublas> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void nrm2(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void gemm(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -783,8 +792,8 @@ void gemm(backend_selector<backend::cublas> selector, transpose transa, transpos
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -792,106 +801,106 @@ void gemm(backend_selector<backend::cublas> selector, transpose transa, transpos
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
           std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void syr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void syr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void ger(backend_selector<backend::cublas> selector, std::int64_t m, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void ger(backend_selector<backend::cublas> selector, std::int64_t m, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void trsm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void dotu(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotu(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void hemm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -899,8 +908,8 @@ void hemm(backend_selector<backend::cublas> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hemm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -908,40 +917,40 @@ void hemm(backend_selector<backend::cublas> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hpr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void hpr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void gbmv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -949,8 +958,8 @@ void gbmv(backend_selector<backend::cublas> selector, transpose trans, std::int6
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -958,52 +967,52 @@ void gbmv(backend_selector<backend::cublas> selector, transpose trans, std::int6
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void symm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -1011,8 +1020,8 @@ void symm(backend_selector<backend::cublas> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -1020,222 +1029,222 @@ void symm(backend_selector<backend::cublas> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void dotc(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotc(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void syr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void syr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void trmm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void rotmg(backend_selector<backend::cublas> selector, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void rotmg(backend_selector<backend::cublas> selector, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void trsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void copy(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy_batch(backend_selector<backend::cublas> selector, std::int64_t n,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::cublas> selector, std::int64_t n,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::cublas> selector, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::cublas> selector, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void hemv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void hemv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemmt(backend_selector<backend::cublas> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::cublas> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, double alpha,
            sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::cublas> selector, uplo upper_lower, transpose transa,
@@ -1243,8 +1252,8 @@ void gemmt(backend_selector<backend::cublas> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::cublas> selector, uplo upper_lower, transpose transa,
@@ -1252,140 +1261,140 @@ void gemmt(backend_selector<backend::cublas> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void asum(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::cublas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void sbmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void sbmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void spr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void spr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void iamax(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::cublas> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::cublas> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void rotm(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
           sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void rotm(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
           sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void dot(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::cublas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void trsm_batch(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -1393,9 +1402,9 @@ void trsm_batch(backend_selector<backend::cublas> selector, side left_right, upl
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -1403,9 +1412,9 @@ void trsm_batch(backend_selector<backend::cublas> selector, side left_right, upl
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -1413,9 +1422,9 @@ void trsm_batch(backend_selector<backend::cublas> selector, side left_right, upl
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::cublas> selector, side left_right, uplo upper_lower,
@@ -1423,9 +1432,9 @@ void trsm_batch(backend_selector<backend::cublas> selector, side left_right, upl
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void her2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
@@ -1433,8 +1442,8 @@ void her2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void her2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
@@ -1442,60 +1451,60 @@ void her2k(backend_selector<backend::cublas> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void rotg(backend_selector<backend::cublas> selector, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
-    oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::cublas> selector, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
-    oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::cublas> selector, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
-    oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::cublas> selector, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
-    oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void symv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void symv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void omatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -1503,8 +1512,8 @@ void omatcopy_batch(backend_selector<backend::cublas> selector, transpose trans,
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
@@ -1512,38 +1521,38 @@ void omatcopy_batch(backend_selector<backend::cublas> selector, transpose trans,
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -1551,9 +1560,9 @@ void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -1561,9 +1570,9 @@ void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<double, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -1572,9 +1581,9 @@ void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa,
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -1584,113 +1593,113 @@ void omatadd_batch(backend_selector<backend::cublas> selector, transpose transa,
                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                    std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void omatadd(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -1698,8 +1707,8 @@ void omatadd(backend_selector<backend::cublas> selector, transpose transa, trans
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::cublas> selector, transpose transa, transpose transb,
@@ -1707,8 +1716,8 @@ void omatadd(backend_selector<backend::cublas> selector, transpose transa, trans
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 // USM APIs
@@ -1716,8 +1725,8 @@ void omatadd(backend_selector<backend::cublas> selector, transpose transa, trans
 sycl::event syr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -1725,70 +1734,70 @@ sycl::event syr2(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::cublas> selector, std::int64_t n, float alpha, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::cublas> selector, std::int64_t n,
                  std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::cublas> selector, std::int64_t n,
                  std::complex<double> alpha, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1796,8 +1805,8 @@ sycl::event trmv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1805,24 +1814,24 @@ sycl::event trmv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1830,8 +1839,8 @@ sycl::event tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1839,24 +1848,24 @@ sycl::event tpmv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
@@ -1865,8 +1874,8 @@ sycl::event hpmv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1875,8 +1884,8 @@ sycl::event hpmv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1884,7 +1893,7 @@ sycl::event syrk(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1893,7 +1902,7 @@ sycl::event syrk(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1903,7 +1912,7 @@ sycl::event syrk(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                  std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1913,7 +1922,7 @@ sycl::event syrk(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                  std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1923,7 +1932,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo* upper_l
                        const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1934,7 +1943,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo* upper_l
                        const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1946,7 +1955,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo* upper_l
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1958,7 +1967,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo* upper_l
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1969,7 +1978,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lo
                        std::int64_t lda, std::int64_t stride_a, float beta, float* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1980,7 +1989,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lo
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1992,7 +2001,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lo
                        std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2004,7 +2013,7 @@ sycl::event syrk_batch(backend_selector<backend::cublas> selector, uplo upper_lo
                        std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2014,8 +2023,8 @@ sycl::event her2(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2023,8 +2032,8 @@ sycl::event her2(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2034,8 +2043,8 @@ sycl::event hbmv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2045,56 +2054,56 @@ sycl::event hbmv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<float>* x,
                 std::int64_t incx, std::complex<float>* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<double>* x,
                 std::int64_t incx, std::complex<double>* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::cublas> selector, std::int64_t n, float* x,
                 std::int64_t incx, float* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::cublas> selector, std::int64_t n, double* x,
                 std::int64_t incx, double* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
                  const float* x, std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
                  const double* x, std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2102,8 +2111,8 @@ sycl::event axpy(backend_selector<backend::cublas> selector, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2111,8 +2120,8 @@ sycl::event axpy(backend_selector<backend::cublas> selector, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2120,7 +2129,7 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t*
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2129,7 +2138,7 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2139,7 +2148,7 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t*
                        std::int64_t* incx, std::complex<float>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2149,7 +2158,7 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t*
                        std::int64_t* incx, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2158,9 +2167,9 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2168,9 +2177,9 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2179,9 +2188,9 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        std::int64_t stridex, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2190,25 +2199,25 @@ sycl::event axpy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        std::int64_t stridex, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::cublas> selector, std::int64_t n, float alpha,
                   const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::cublas> selector, std::int64_t n, double alpha,
                   const double* x, std::int64_t incx, const double beta, double* y,
                   std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2216,8 +2225,8 @@ sycl::event axpby(backend_selector<backend::cublas> selector, std::int64_t n,
                   std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                   const std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2225,8 +2234,8 @@ sycl::event axpby(backend_selector<backend::cublas> selector, std::int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2234,8 +2243,8 @@ sycl::event gerc(backend_selector<backend::cublas> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2243,8 +2252,8 @@ sycl::event gerc(backend_selector<backend::cublas> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2253,8 +2262,8 @@ sycl::event syr2k(backend_selector<backend::cublas> selector, uplo upper_lower,
                   const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2263,8 +2272,8 @@ sycl::event syr2k(backend_selector<backend::cublas> selector, uplo upper_lower,
                   const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2274,8 +2283,8 @@ sycl::event syr2k(backend_selector<backend::cublas> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2285,8 +2294,8 @@ sycl::event syr2k(backend_selector<backend::cublas> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2294,8 +2303,8 @@ sycl::event gemv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2303,8 +2312,8 @@ sycl::event gemv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2313,8 +2322,8 @@ sycl::event gemv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2323,8 +2332,8 @@ sycl::event gemv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2334,7 +2343,7 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose tra
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2346,7 +2355,7 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose tra
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2358,7 +2367,7 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2370,7 +2379,7 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2381,9 +2390,9 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose* tr
                        std::int64_t* lda, const float** x, std::int64_t* incx, float* beta,
                        float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2392,9 +2401,9 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose* tr
                        std::int64_t* lda, const double** x, std::int64_t* incx, double* beta,
                        double** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2404,9 +2413,9 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose* tr
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
                        std::complex<float>** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2417,9 +2426,9 @@ sycl::event gemv_batch(backend_selector<backend::cublas> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2428,7 +2437,7 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side left_rig
                        const float* x, std::int64_t incx, std::int64_t stridex, float* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2439,7 +2448,7 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side left_rig
                        const double* x, std::int64_t incx, std::int64_t stridex, double* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2451,7 +2460,7 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side left_rig
                        std::int64_t stridex, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2463,7 +2472,7 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side left_rig
                        std::int64_t stridex, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2474,9 +2483,9 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        const float** x, std::int64_t* incx, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2485,9 +2494,9 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        const double** x, std::int64_t* incx, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2496,9 +2505,9 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2507,9 +2516,9 @@ sycl::event dgmm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2517,8 +2526,8 @@ sycl::event her(backend_selector<backend::cublas> selector, uplo upper_lower, st
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -2526,56 +2535,56 @@ sycl::event her(backend_selector<backend::cublas> selector, uplo upper_lower, st
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::cublas> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::cublas> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::cublas> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::cublas> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
@@ -2585,7 +2594,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2597,7 +2606,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2609,7 +2618,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        const std::complex<float>** b, std::int64_t* ldb, std::complex<float>* beta,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2622,7 +2631,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2634,7 +2643,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2646,7 +2655,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2658,7 +2667,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2670,7 +2679,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2682,7 +2691,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        const float* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2694,7 +2703,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        const double* b, std::int64_t ldb, std::int64_t stride_b, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2707,7 +2716,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        std::int64_t stride_b, std::complex<float> beta, std::complex<float>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2720,7 +2729,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        std::int64_t stride_b, std::complex<double> beta, std::complex<double>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2733,7 +2742,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2745,7 +2754,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2757,7 +2766,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2769,7 +2778,7 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2778,48 +2787,48 @@ sycl::event gemm_batch(backend_selector<backend::cublas> selector, transpose tra
 sycl::event spmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event spmv(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::cublas> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::cublas> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::cublas> selector, std::int64_t n, std::complex<float>* x,
                  std::int64_t incx, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::cublas> selector, std::int64_t n,
                  std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
@@ -2827,8 +2836,8 @@ sycl::event geru(backend_selector<backend::cublas> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2836,38 +2845,38 @@ sycl::event geru(backend_selector<backend::cublas> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::cublas> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::cublas> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
@@ -2876,8 +2885,8 @@ sycl::event gemm(backend_selector<backend::cublas> selector, transpose transa, t
                  std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2886,8 +2895,8 @@ sycl::event gemm(backend_selector<backend::cublas> selector, transpose transa, t
                  std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2897,8 +2906,8 @@ sycl::event gemm(backend_selector<backend::cublas> selector, transpose transa, t
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2908,8 +2917,8 @@ sycl::event gemm(backend_selector<backend::cublas> selector, transpose transa, t
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2919,8 +2928,8 @@ sycl::event gemm(backend_selector<backend::cublas> selector, transpose transa, t
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2929,8 +2938,8 @@ sycl::event gemm(backend_selector<backend::cublas> selector, transpose transa, t
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2939,8 +2948,8 @@ sycl::event gemm(backend_selector<backend::cublas> selector, transpose transa, t
                  std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2950,7 +2959,7 @@ sycl::event gemm_bias(backend_selector<backend::cublas> selector, transpose tran
                       std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2962,7 +2971,7 @@ sycl::event gemm_bias(backend_selector<backend::cublas> selector, transpose tran
                       std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2974,7 +2983,7 @@ sycl::event gemm_bias(backend_selector<backend::cublas> selector, transpose tran
                       std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2986,7 +2995,7 @@ sycl::event gemm_bias(backend_selector<backend::cublas> selector, transpose tran
                       std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2996,7 +3005,7 @@ sycl::event herk(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const std::complex<float>* a,
                  std::int64_t lda, float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::herk(
+    auto done = oneapi::math::blas::cublas::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3005,7 +3014,7 @@ sycl::event herk(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const std::complex<double>* a,
                  std::int64_t lda, double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::herk(
+    auto done = oneapi::math::blas::cublas::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3013,8 +3022,8 @@ sycl::event herk(backend_selector<backend::cublas> selector, uplo upper_lower, t
 sycl::event ger(backend_selector<backend::cublas> selector, std::int64_t m, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                 float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3022,8 +3031,8 @@ sycl::event ger(backend_selector<backend::cublas> selector, std::int64_t m, std:
                 double alpha, const double* x, std::int64_t incx, const double* y,
                 std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3031,9 +3040,9 @@ sycl::event trsm(backend_selector<backend::cublas> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3041,9 +3050,9 @@ sycl::event trsm(backend_selector<backend::cublas> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3052,9 +3061,9 @@ sycl::event trsm(backend_selector<backend::cublas> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3063,9 +3072,9 @@ sycl::event trsm(backend_selector<backend::cublas> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3074,7 +3083,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side left_rig
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3085,7 +3094,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side left_rig
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3097,7 +3106,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3109,7 +3118,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3120,7 +3129,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3131,7 +3140,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        double** b, std::int64_t* ldb, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3143,7 +3152,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3155,7 +3164,7 @@ sycl::event trsm_batch(backend_selector<backend::cublas> selector, side* left_ri
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3165,8 +3174,8 @@ sycl::event dotu(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3174,8 +3183,8 @@ sycl::event dotu(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3185,8 +3194,8 @@ sycl::event hemm(backend_selector<backend::cublas> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3196,8 +3205,8 @@ sycl::event hemm(backend_selector<backend::cublas> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3205,8 +3214,8 @@ sycl::event hpr2(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3214,8 +3223,8 @@ sycl::event hpr2(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3224,8 +3233,8 @@ sycl::event gbmv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3234,8 +3243,8 @@ sycl::event gbmv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3245,8 +3254,8 @@ sycl::event gbmv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3256,15 +3265,15 @@ sycl::event gbmv(backend_selector<backend::cublas> selector, transpose trans, st
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3272,7 +3281,7 @@ sycl::event tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, t
 sycl::event tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3281,7 +3290,7 @@ sycl::event tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3290,7 +3299,7 @@ sycl::event tbmv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3300,8 +3309,8 @@ sycl::event symm(backend_selector<backend::cublas> selector, side left_right, up
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3310,8 +3319,8 @@ sycl::event symm(backend_selector<backend::cublas> selector, side left_right, up
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3321,8 +3330,8 @@ sycl::event symm(backend_selector<backend::cublas> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3332,8 +3341,8 @@ sycl::event symm(backend_selector<backend::cublas> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3341,8 +3350,8 @@ sycl::event dotc(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3350,24 +3359,24 @@ sycl::event dotc(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -3375,9 +3384,9 @@ sycl::event trmm(backend_selector<backend::cublas> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3385,9 +3394,9 @@ sycl::event trmm(backend_selector<backend::cublas> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3396,9 +3405,9 @@ sycl::event trmm(backend_selector<backend::cublas> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3407,39 +3416,39 @@ sycl::event trmm(backend_selector<backend::cublas> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::cublas> selector, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::cublas> selector, double* d1, double* d2, double* x1,
                   double y1, double* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3447,8 +3456,8 @@ sycl::event tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3456,24 +3465,24 @@ sycl::event tpsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3481,8 +3490,8 @@ sycl::event trsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3490,47 +3499,47 @@ sycl::event trsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::cublas> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::cublas> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t* n, const float** x,
                        std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3539,7 +3548,7 @@ sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3548,7 +3557,7 @@ sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t*
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3557,7 +3566,7 @@ sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t*
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3566,7 +3575,7 @@ sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3575,7 +3584,7 @@ sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3584,7 +3593,7 @@ sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3593,7 +3602,7 @@ sycl::event copy_batch(backend_selector<backend::cublas> selector, std::int64_t
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3603,7 +3612,7 @@ sycl::event hemv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hemv(
+    auto done = oneapi::math::blas::cublas::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3613,7 +3622,7 @@ sycl::event hemv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::hemv(
+    auto done = oneapi::math::blas::cublas::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3622,9 +3631,9 @@ sycl::event gemmt(backend_selector<backend::cublas> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a,
                   std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3632,9 +3641,9 @@ sycl::event gemmt(backend_selector<backend::cublas> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a,
                   std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3643,9 +3652,9 @@ sycl::event gemmt(backend_selector<backend::cublas> selector, uplo upper_lower,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3654,9 +3663,9 @@ sycl::event gemmt(backend_selector<backend::cublas> selector, uplo upper_lower,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3665,8 +3674,8 @@ sycl::event sbmv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3675,45 +3684,45 @@ sycl::event sbmv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::cublas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::cublas> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::cublas> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3721,7 +3730,7 @@ sycl::event tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
 sycl::event tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3730,7 +3739,7 @@ sycl::event tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3739,7 +3748,7 @@ sycl::event tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::cublas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3747,78 +3756,78 @@ sycl::event tbsv(backend_selector<backend::cublas> selector, uplo upper_lower, t
 sycl::event spr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event spr2(backend_selector<backend::cublas> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::cublas> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::cublas> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::cublas> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::cublas> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::cublas> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::cublas> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::cublas> selector, float* a, float* b, float* c, float* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::cublas> selector, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3826,7 +3835,7 @@ sycl::event rotg(backend_selector<backend::cublas> selector, std::complex<float>
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3834,15 +3843,15 @@ sycl::event rotg(backend_selector<backend::cublas> selector, std::complex<double
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event sdsdot(backend_selector<backend::cublas> selector, std::int64_t n, float sb,
                    const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                    float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
-                                                         incy, result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
+                                                          incy, result, dependencies);
     return done;
 }
 
@@ -3852,8 +3861,8 @@ sycl::event her2k(backend_selector<backend::cublas> selector, uplo upper_lower,
                   std::int64_t ldb, float beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3863,32 +3872,32 @@ sycl::event her2k(backend_selector<backend::cublas> selector, uplo upper_lower,
                   std::int64_t ldb, double beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::cublas> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::cublas> selector, std::int64_t n, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::cublas> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
@@ -3896,7 +3905,7 @@ sycl::event symv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx,
                  float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::symv(
+    auto done = oneapi::math::blas::cublas::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3905,7 +3914,7 @@ sycl::event symv(backend_selector<backend::cublas> selector, uplo upper_lower, s
                  double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::symv(
+    auto done = oneapi::math::blas::cublas::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3915,7 +3924,7 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3926,7 +3935,7 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3937,7 +3946,7 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3948,7 +3957,7 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3958,7 +3967,7 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3967,7 +3976,7 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3977,7 +3986,7 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3987,7 +3996,7 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3998,7 +4007,7 @@ sycl::event omatadd_batch(backend_selector<backend::cublas> selector, transpose
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4010,7 +4019,7 @@ sycl::event omatadd_batch(backend_selector<backend::cublas> selector, transpose
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4023,7 +4032,7 @@ sycl::event omatadd_batch(backend_selector<backend::cublas> selector, transpose
                           const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4036,7 +4045,7 @@ sycl::event omatadd_batch(backend_selector<backend::cublas> selector, transpose
                           const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4045,16 +4054,16 @@ sycl::event omatadd_batch(backend_selector<backend::cublas> selector, transpose
 sycl::event omatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event omatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4062,8 +4071,8 @@ sycl::event omatcopy(backend_selector<backend::cublas> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                      std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4071,8 +4080,8 @@ sycl::event omatcopy(backend_selector<backend::cublas> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                      std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4080,7 +4089,7 @@ sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose tran
                       std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4089,7 +4098,7 @@ sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose tran
                       std::int64_t n, double alpha, const double* a, std::int64_t lda,
                       std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4099,7 +4108,7 @@ sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4109,7 +4118,7 @@ sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<double>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4117,16 +4126,16 @@ sycl::event omatcopy2(backend_selector<backend::cublas> selector, transpose tran
 sycl::event imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
 sycl::event imatcopy(backend_selector<backend::cublas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4134,8 +4143,8 @@ sycl::event imatcopy(backend_selector<backend::cublas> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, std::complex<float>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4143,8 +4152,8 @@ sycl::event imatcopy(backend_selector<backend::cublas> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, std::complex<double>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4152,9 +4161,9 @@ sycl::event omatadd(backend_selector<backend::cublas> selector, transpose transa
                     std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                     float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4162,9 +4171,9 @@ sycl::event omatadd(backend_selector<backend::cublas> selector, transpose transa
                     std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                     double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4173,9 +4182,9 @@ sycl::event omatadd(backend_selector<backend::cublas> selector, transpose transa
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4184,9 +4193,9 @@ sycl::event omatadd(backend_selector<backend::cublas> selector, transpose transa
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4195,9 +4204,9 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4206,9 +4215,9 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4217,9 +4226,9 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4228,9 +4237,9 @@ sycl::event omatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4238,9 +4247,9 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4248,9 +4257,9 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4259,9 +4268,9 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4270,8 +4279,8 @@ sycl::event imatcopy_batch(backend_selector<backend::cublas> selector, transpose
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
diff --git a/include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp b/include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp
similarity index 76%
rename from include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp
rename to include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp
index 8a6d5448f..1b86c5bb6 100644
--- a/include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp
+++ b/include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp
@@ -16,8 +16,8 @@
 *  limitations under the License.
 *
 **************************************************************************/
-#ifndef _ONEMKL_BLAS_CUBLAS_HPP_
-#define _ONEMKL_BLAS_CUBLAS_HPP_
+#ifndef _ONEMATH_BLAS_CUBLAS_HPP_
+#define _ONEMATH_BLAS_CUBLAS_HPP_
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
@@ -26,30 +26,30 @@
 #include <complex>
 #include <cstdint>
 #include <string>
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace oneapi {
-namespace mkl {
-using oneapi::mkl::diag;
-using oneapi::mkl::offset;
-using oneapi::mkl::side;
-using oneapi::mkl::transpose;
-using oneapi::mkl::uplo;
+namespace math {
+using oneapi::math::diag;
+using oneapi::math::offset;
+using oneapi::math::side;
+using oneapi::math::transpose;
+using oneapi::math::uplo;
 namespace blas {
 namespace cublas {
 namespace column_major {
 
-#include "onemkl_blas_cublas.hxx"
+#include "onemath_blas_cublas.hxx"
 
 } //namespace column_major
 namespace row_major {
 
-#include "onemkl_blas_cublas.hxx"
+#include "onemath_blas_cublas.hxx"
 
 } //namespace row_major
 } //namespace cublas
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_BLAS_CUBLAS_HPP_
+#endif //_ONEMATH_BLAS_CUBLAS_HPP_
diff --git a/include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hxx b/include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hxx
similarity index 100%
rename from include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hxx
rename to include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hxx
diff --git a/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hpp b/include/oneapi/math/blas/detail/mklcpu/blas_ct.hpp
similarity index 84%
rename from include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hpp
rename to include/oneapi/math/blas/detail/mklcpu/blas_ct.hpp
index 24b0a9c7e..fc9367ed1 100644
--- a/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hpp
+++ b/include/oneapi/math/blas/detail/mklcpu/blas_ct.hpp
@@ -28,14 +28,14 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
 
-#include "oneapi/mkl/blas/detail/blas_ct_backends.hpp"
-#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"
+#include "oneapi/math/blas/detail/blas_ct_backends.hpp"
+#include "oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 
@@ -52,7 +52,7 @@ namespace row_major {
 
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
 #endif //_DETAIL_MKLCPU_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hxx b/include/oneapi/math/blas/detail/mklcpu/blas_ct.hxx
similarity index 72%
rename from include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hxx
rename to include/oneapi/math/blas/detail/mklcpu/blas_ct.hxx
index 38123485e..dbc1554fe 100644
--- a/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hxx
+++ b/include/oneapi/math/blas/detail/mklcpu/blas_ct.hxx
@@ -22,112 +22,112 @@
 void herk(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, float beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void herk(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<std::complex<double>, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void scal(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void spr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void spr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -136,9 +136,9 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -147,9 +147,9 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -158,9 +158,9 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -169,9 +169,9 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -180,9 +180,9 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -191,9 +191,9 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -202,9 +202,9 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -213,57 +213,57 @@ void gemm_batch(backend_selector<backend::mklcpu> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
@@ -271,9 +271,9 @@ void syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
@@ -281,181 +281,186 @@ void syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void her2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void her2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void hbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void hbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void rot(backend_selector<backend::mklcpu> selector, std::int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::mklcpu> selector, std::int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void axpy(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpby(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
            sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
            sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void sdsdot(backend_selector<backend::mklcpu> selector, std::int64_t n, float sb,
             sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
             std::int64_t incy, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
+                                              result);
 }
 
 void gerc(backend_selector<backend::mklcpu> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void gerc(backend_selector<backend::mklcpu> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
            sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
@@ -463,8 +468,8 @@ void syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
@@ -472,40 +477,40 @@ void syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -513,9 +518,9 @@ void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
                 std::int64_t stridex, float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -523,9 +528,9 @@ void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
                 std::int64_t stridex, double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -534,9 +539,9 @@ void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -545,9 +550,9 @@ void gemv_batch(backend_selector<backend::mklcpu> selector, transpose trans, std
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std::int64_t m,
@@ -555,9 +560,9 @@ void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std::int64_t m,
@@ -565,9 +570,9 @@ void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std::int64_t m,
@@ -575,9 +580,9 @@ void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std::int64_t m,
@@ -585,87 +590,87 @@ void dgmm_batch(backend_selector<backend::mklcpu> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void her(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void her(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void hpr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void hpr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void iamin(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::mklcpu> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::mklcpu> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void hpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void hpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx,
           double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -673,8 +678,9 @@ void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -682,8 +688,9 @@ void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -691,8 +698,9 @@ void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -700,84 +708,85 @@ void gemm_bias(backend_selector<backend::mklcpu> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void swap(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void geru(backend_selector<backend::mklcpu> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void geru(backend_selector<backend::mklcpu> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -785,8 +794,8 @@ void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpos
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -794,106 +803,106 @@ void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpos
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
           std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void syr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void syr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void ger(backend_selector<backend::mklcpu> selector, std::int64_t m, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void ger(backend_selector<backend::mklcpu> selector, std::int64_t m, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void trsm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void dotu(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotu(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void hemm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -901,8 +910,8 @@ void hemm(backend_selector<backend::mklcpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hemm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -910,40 +919,40 @@ void hemm(backend_selector<backend::mklcpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hpr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void hpr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void gbmv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -951,8 +960,8 @@ void gbmv(backend_selector<backend::mklcpu> selector, transpose trans, std::int6
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -960,52 +969,52 @@ void gbmv(backend_selector<backend::mklcpu> selector, transpose trans, std::int6
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void symm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -1013,8 +1022,8 @@ void symm(backend_selector<backend::mklcpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -1022,222 +1031,222 @@ void symm(backend_selector<backend::mklcpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void dotc(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotc(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void syr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void syr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void trmm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void rotmg(backend_selector<backend::mklcpu> selector, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void rotmg(backend_selector<backend::mklcpu> selector, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void copy(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void hemv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void hemv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, double alpha,
            sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose transa,
@@ -1245,8 +1254,8 @@ void gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose transa,
@@ -1254,140 +1263,140 @@ void gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void asum(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::mklcpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void sbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void sbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void spr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void spr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void iamax(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::mklcpu> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::mklcpu> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void rotm(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
           sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void rotm(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
           sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void dot(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::mklcpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -1395,9 +1404,9 @@ void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, upl
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -1405,9 +1414,9 @@ void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, upl
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -1415,9 +1424,9 @@ void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, upl
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, uplo upper_lower,
@@ -1425,9 +1434,9 @@ void trsm_batch(backend_selector<backend::mklcpu> selector, side left_right, upl
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void her2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
@@ -1435,8 +1444,8 @@ void her2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void her2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
@@ -1444,60 +1453,60 @@ void her2k(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void rotg(backend_selector<backend::mklcpu> selector, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::mklcpu> selector, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::mklcpu> selector, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::mklcpu> selector, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
-    oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void symv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void symv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -1505,8 +1514,8 @@ void omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans,
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
@@ -1514,38 +1523,38 @@ void omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans,
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -1553,9 +1562,9 @@ void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -1563,9 +1572,9 @@ void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<double, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -1574,9 +1583,9 @@ void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa,
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -1586,113 +1595,113 @@ void omatadd_batch(backend_selector<backend::mklcpu> selector, transpose transa,
                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                    std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy2(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void imatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void omatadd(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -1700,8 +1709,8 @@ void omatadd(backend_selector<backend::mklcpu> selector, transpose transa, trans
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::mklcpu> selector, transpose transa, transpose transb,
@@ -1709,8 +1718,8 @@ void omatadd(backend_selector<backend::mklcpu> selector, transpose transa, trans
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 // USM APIs
@@ -1718,8 +1727,8 @@ void omatadd(backend_selector<backend::mklcpu> selector, transpose transa, trans
 sycl::event syr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -1727,70 +1736,70 @@ sycl::event syr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  std::complex<double> alpha, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1798,8 +1807,8 @@ sycl::event trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1807,24 +1816,24 @@ sycl::event trmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1832,8 +1841,8 @@ sycl::event tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1841,24 +1850,24 @@ sycl::event tpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
@@ -1867,8 +1876,8 @@ sycl::event hpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1877,8 +1886,8 @@ sycl::event hpmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1886,7 +1895,7 @@ sycl::event syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1895,7 +1904,7 @@ sycl::event syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1905,7 +1914,7 @@ sycl::event syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                  std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1915,7 +1924,7 @@ sycl::event syrk(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                  std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1925,7 +1934,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo* upper_l
                        const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1936,7 +1945,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo* upper_l
                        const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1948,7 +1957,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo* upper_l
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1960,7 +1969,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo* upper_l
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1971,7 +1980,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lo
                        std::int64_t lda, std::int64_t stride_a, float beta, float* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1982,7 +1991,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lo
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1994,7 +2003,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lo
                        std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2006,7 +2015,7 @@ sycl::event syrk_batch(backend_selector<backend::mklcpu> selector, uplo upper_lo
                        std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2016,8 +2025,8 @@ sycl::event her2(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2025,8 +2034,8 @@ sycl::event her2(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2036,8 +2045,8 @@ sycl::event hbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2047,56 +2056,56 @@ sycl::event hbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<float>* x,
                 std::int64_t incx, std::complex<float>* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<double>* x,
                 std::int64_t incx, std::complex<double>* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklcpu> selector, std::int64_t n, float* x,
                 std::int64_t incx, float* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklcpu> selector, std::int64_t n, double* x,
                 std::int64_t incx, double* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
                  const float* x, std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
                  const double* x, std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2104,8 +2113,8 @@ sycl::event axpy(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2113,8 +2122,8 @@ sycl::event axpy(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2122,7 +2131,7 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t*
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2131,7 +2140,7 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2141,7 +2150,7 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t*
                        std::int64_t* incx, std::complex<float>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2151,7 +2160,7 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t*
                        std::int64_t* incx, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2160,9 +2169,9 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2170,9 +2179,9 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2181,9 +2190,9 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        std::int64_t stridex, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2192,25 +2201,25 @@ sycl::event axpy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        std::int64_t stridex, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::mklcpu> selector, std::int64_t n, float alpha,
                   const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::mklcpu> selector, std::int64_t n, double alpha,
                   const double* x, std::int64_t incx, const double beta, double* y,
                   std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2218,8 +2227,8 @@ sycl::event axpby(backend_selector<backend::mklcpu> selector, std::int64_t n,
                   std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                   const std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2227,8 +2236,8 @@ sycl::event axpby(backend_selector<backend::mklcpu> selector, std::int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2236,8 +2245,8 @@ sycl::event gerc(backend_selector<backend::mklcpu> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2245,8 +2254,8 @@ sycl::event gerc(backend_selector<backend::mklcpu> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2255,8 +2264,8 @@ sycl::event syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2265,8 +2274,8 @@ sycl::event syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2276,8 +2285,8 @@ sycl::event syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2287,8 +2296,8 @@ sycl::event syr2k(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2296,8 +2305,8 @@ sycl::event gemv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2305,8 +2314,8 @@ sycl::event gemv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2315,8 +2324,8 @@ sycl::event gemv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2325,8 +2334,8 @@ sycl::event gemv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2336,7 +2345,7 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2348,7 +2357,7 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2360,7 +2369,7 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2372,7 +2381,7 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2383,9 +2392,9 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        std::int64_t* lda, const float** x, std::int64_t* incx, float* beta,
                        float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2394,9 +2403,9 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        std::int64_t* lda, const double** x, std::int64_t* incx, double* beta,
                        double** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2406,9 +2415,9 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
                        std::complex<float>** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2419,9 +2428,9 @@ sycl::event gemv_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2430,7 +2439,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        const float* x, std::int64_t incx, std::int64_t stridex, float* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2441,7 +2450,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        const double* x, std::int64_t incx, std::int64_t stridex, double* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2453,7 +2462,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        std::int64_t stridex, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2465,7 +2474,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        std::int64_t stridex, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2476,9 +2485,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        const float** x, std::int64_t* incx, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2487,9 +2496,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        const double** x, std::int64_t* incx, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2498,9 +2507,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2509,9 +2518,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2519,8 +2528,8 @@ sycl::event her(backend_selector<backend::mklcpu> selector, uplo upper_lower, st
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -2528,56 +2537,56 @@ sycl::event her(backend_selector<backend::mklcpu> selector, uplo upper_lower, st
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklcpu> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklcpu> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklcpu> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklcpu> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
@@ -2587,7 +2596,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2599,7 +2608,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2611,7 +2620,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        const std::complex<float>** b, std::int64_t* ldb, std::complex<float>* beta,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2624,7 +2633,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2636,7 +2645,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2648,7 +2657,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2660,7 +2669,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2672,7 +2681,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2684,7 +2693,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        const float* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2696,7 +2705,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        const double* b, std::int64_t ldb, std::int64_t stride_b, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2709,7 +2718,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        std::int64_t stride_b, std::complex<float> beta, std::complex<float>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2722,7 +2731,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        std::int64_t stride_b, std::complex<double> beta, std::complex<double>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2735,7 +2744,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2747,7 +2756,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2759,7 +2768,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2771,7 +2780,7 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2780,48 +2789,48 @@ sycl::event gemm_batch(backend_selector<backend::mklcpu> selector, transpose tra
 sycl::event spmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event spmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklcpu> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklcpu> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklcpu> selector, std::int64_t n, std::complex<float>* x,
                  std::int64_t incx, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
@@ -2829,8 +2838,8 @@ sycl::event geru(backend_selector<backend::mklcpu> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2838,38 +2847,38 @@ sycl::event geru(backend_selector<backend::mklcpu> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklcpu> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
@@ -2878,8 +2887,8 @@ sycl::event gemm(backend_selector<backend::mklcpu> selector, transpose transa, t
                  std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2888,8 +2897,8 @@ sycl::event gemm(backend_selector<backend::mklcpu> selector, transpose transa, t
                  std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2899,8 +2908,8 @@ sycl::event gemm(backend_selector<backend::mklcpu> selector, transpose transa, t
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2910,8 +2919,8 @@ sycl::event gemm(backend_selector<backend::mklcpu> selector, transpose transa, t
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2921,8 +2930,8 @@ sycl::event gemm(backend_selector<backend::mklcpu> selector, transpose transa, t
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2931,8 +2940,8 @@ sycl::event gemm(backend_selector<backend::mklcpu> selector, transpose transa, t
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2941,8 +2950,8 @@ sycl::event gemm(backend_selector<backend::mklcpu> selector, transpose transa, t
                  std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2952,7 +2961,7 @@ sycl::event gemm_bias(backend_selector<backend::mklcpu> selector, transpose tran
                       std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2964,7 +2973,7 @@ sycl::event gemm_bias(backend_selector<backend::mklcpu> selector, transpose tran
                       std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2976,7 +2985,7 @@ sycl::event gemm_bias(backend_selector<backend::mklcpu> selector, transpose tran
                       std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2988,7 +2997,7 @@ sycl::event gemm_bias(backend_selector<backend::mklcpu> selector, transpose tran
                       std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2998,7 +3007,7 @@ sycl::event herk(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const std::complex<float>* a,
                  std::int64_t lda, float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::herk(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3007,7 +3016,7 @@ sycl::event herk(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const std::complex<double>* a,
                  std::int64_t lda, double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::herk(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3015,8 +3024,8 @@ sycl::event herk(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
 sycl::event ger(backend_selector<backend::mklcpu> selector, std::int64_t m, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                 float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3024,8 +3033,8 @@ sycl::event ger(backend_selector<backend::mklcpu> selector, std::int64_t m, std:
                 double alpha, const double* x, std::int64_t incx, const double* y,
                 std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3033,9 +3042,9 @@ sycl::event trsm(backend_selector<backend::mklcpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3043,9 +3052,9 @@ sycl::event trsm(backend_selector<backend::mklcpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3054,9 +3063,9 @@ sycl::event trsm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3065,9 +3074,9 @@ sycl::event trsm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3076,7 +3085,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3087,7 +3096,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3099,7 +3108,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3111,7 +3120,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3122,7 +3131,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3133,7 +3142,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        double** b, std::int64_t* ldb, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3145,7 +3154,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3157,7 +3166,7 @@ sycl::event trsm_batch(backend_selector<backend::mklcpu> selector, side* left_ri
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3167,8 +3176,8 @@ sycl::event dotu(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3176,8 +3185,8 @@ sycl::event dotu(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3187,8 +3196,8 @@ sycl::event hemm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3198,8 +3207,8 @@ sycl::event hemm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3207,8 +3216,8 @@ sycl::event hpr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3216,8 +3225,8 @@ sycl::event hpr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3226,8 +3235,8 @@ sycl::event gbmv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3236,8 +3245,8 @@ sycl::event gbmv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3247,8 +3256,8 @@ sycl::event gbmv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3258,15 +3267,15 @@ sycl::event gbmv(backend_selector<backend::mklcpu> selector, transpose trans, st
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3274,7 +3283,7 @@ sycl::event tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
 sycl::event tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3283,7 +3292,7 @@ sycl::event tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3292,7 +3301,7 @@ sycl::event tbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3302,8 +3311,8 @@ sycl::event symm(backend_selector<backend::mklcpu> selector, side left_right, up
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3312,8 +3321,8 @@ sycl::event symm(backend_selector<backend::mklcpu> selector, side left_right, up
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3323,8 +3332,8 @@ sycl::event symm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3334,8 +3343,8 @@ sycl::event symm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3343,8 +3352,8 @@ sycl::event dotc(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3352,24 +3361,24 @@ sycl::event dotc(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -3377,9 +3386,9 @@ sycl::event trmm(backend_selector<backend::mklcpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3387,9 +3396,9 @@ sycl::event trmm(backend_selector<backend::mklcpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3398,9 +3407,9 @@ sycl::event trmm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3409,39 +3418,39 @@ sycl::event trmm(backend_selector<backend::mklcpu> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::mklcpu> selector, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::mklcpu> selector, double* d1, double* d2, double* x1,
                   double y1, double* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3449,8 +3458,8 @@ sycl::event tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3458,24 +3467,24 @@ sycl::event tpsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3483,8 +3492,8 @@ sycl::event trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3492,47 +3501,47 @@ sycl::event trsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklcpu> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklcpu> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t* n, const float** x,
                        std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3541,7 +3550,7 @@ sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3550,7 +3559,7 @@ sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t*
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3559,7 +3568,7 @@ sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t*
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3568,7 +3577,7 @@ sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3577,7 +3586,7 @@ sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3586,7 +3595,7 @@ sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3595,7 +3604,7 @@ sycl::event copy_batch(backend_selector<backend::mklcpu> selector, std::int64_t
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3605,7 +3614,7 @@ sycl::event hemv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hemv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3615,7 +3624,7 @@ sycl::event hemv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::hemv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3624,9 +3633,9 @@ sycl::event gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a,
                   std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3634,9 +3643,9 @@ sycl::event gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a,
                   std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3645,9 +3654,9 @@ sycl::event gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3656,9 +3665,9 @@ sycl::event gemmt(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3667,8 +3676,8 @@ sycl::event sbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3677,45 +3686,45 @@ sycl::event sbmv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklcpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklcpu> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklcpu> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3723,7 +3732,7 @@ sycl::event tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
 sycl::event tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3732,7 +3741,7 @@ sycl::event tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3741,7 +3750,7 @@ sycl::event tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3749,78 +3758,78 @@ sycl::event tbsv(backend_selector<backend::mklcpu> selector, uplo upper_lower, t
 sycl::event spr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event spr2(backend_selector<backend::mklcpu> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklcpu> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklcpu> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklcpu> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklcpu> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::mklcpu> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::mklcpu> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::mklcpu> selector, float* a, float* b, float* c, float* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::mklcpu> selector, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3828,7 +3837,7 @@ sycl::event rotg(backend_selector<backend::mklcpu> selector, std::complex<float>
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3836,15 +3845,15 @@ sycl::event rotg(backend_selector<backend::mklcpu> selector, std::complex<double
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event sdsdot(backend_selector<backend::mklcpu> selector, std::int64_t n, float sb,
                    const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                    float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
-                                                         incy, result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
+                                                          incy, result, dependencies);
     return done;
 }
 
@@ -3854,8 +3863,8 @@ sycl::event her2k(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   std::int64_t ldb, float beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3865,32 +3874,32 @@ sycl::event her2k(backend_selector<backend::mklcpu> selector, uplo upper_lower,
                   std::int64_t ldb, double beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::mklcpu> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::mklcpu> selector, std::int64_t n, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::mklcpu> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
@@ -3898,7 +3907,7 @@ sycl::event symv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx,
                  float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::symv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3907,7 +3916,7 @@ sycl::event symv(backend_selector<backend::mklcpu> selector, uplo upper_lower, s
                  double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::symv(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3917,7 +3926,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3928,7 +3937,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3939,7 +3948,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3950,7 +3959,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3960,7 +3969,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3969,7 +3978,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3979,7 +3988,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3989,7 +3998,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -4000,7 +4009,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklcpu> selector, transpose
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4012,7 +4021,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklcpu> selector, transpose
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4025,7 +4034,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklcpu> selector, transpose
                           const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4038,7 +4047,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklcpu> selector, transpose
                           const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4047,16 +4056,16 @@ sycl::event omatadd_batch(backend_selector<backend::mklcpu> selector, transpose
 sycl::event omatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event omatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4064,8 +4073,8 @@ sycl::event omatcopy(backend_selector<backend::mklcpu> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                      std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4073,8 +4082,8 @@ sycl::event omatcopy(backend_selector<backend::mklcpu> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                      std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4082,7 +4091,7 @@ sycl::event omatcopy2(backend_selector<backend::mklcpu> selector, transpose tran
                       std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4091,7 +4100,7 @@ sycl::event omatcopy2(backend_selector<backend::mklcpu> selector, transpose tran
                       std::int64_t n, double alpha, const double* a, std::int64_t lda,
                       std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4101,7 +4110,7 @@ sycl::event omatcopy2(backend_selector<backend::mklcpu> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4111,7 +4120,7 @@ sycl::event omatcopy2(backend_selector<backend::mklcpu> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<double>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4119,16 +4128,16 @@ sycl::event omatcopy2(backend_selector<backend::mklcpu> selector, transpose tran
 sycl::event imatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
 sycl::event imatcopy(backend_selector<backend::mklcpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4136,8 +4145,8 @@ sycl::event imatcopy(backend_selector<backend::mklcpu> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, std::complex<float>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4145,8 +4154,8 @@ sycl::event imatcopy(backend_selector<backend::mklcpu> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, std::complex<double>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4154,9 +4163,9 @@ sycl::event omatadd(backend_selector<backend::mklcpu> selector, transpose transa
                     std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                     float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4164,9 +4173,9 @@ sycl::event omatadd(backend_selector<backend::mklcpu> selector, transpose transa
                     std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                     double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4175,9 +4184,9 @@ sycl::event omatadd(backend_selector<backend::mklcpu> selector, transpose transa
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4186,9 +4195,9 @@ sycl::event omatadd(backend_selector<backend::mklcpu> selector, transpose transa
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4197,9 +4206,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4208,9 +4217,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4219,9 +4228,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4230,9 +4239,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4240,9 +4249,9 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4250,9 +4259,9 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4261,9 +4270,9 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4272,8 +4281,8 @@ sycl::event imatcopy_batch(backend_selector<backend::mklcpu> selector, transpose
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
diff --git a/include/oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp b/include/oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp
similarity index 70%
rename from include/oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp
rename to include/oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp
index db9e3f4d0..bb82eaa81 100644
--- a/include/oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp
+++ b/include/oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BLAS_MKLCPU_HPP_
-#define _ONEMKL_BLAS_MKLCPU_HPP_
+#ifndef _ONEMATH_BLAS_MKLCPU_HPP_
+#define _ONEMATH_BLAS_MKLCPU_HPP_
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
@@ -26,31 +26,31 @@
 #endif
 #include <complex>
 #include <cstdint>
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
-using oneapi::mkl::transpose;
-using oneapi::mkl::uplo;
-using oneapi::mkl::side;
-using oneapi::mkl::diag;
-using oneapi::mkl::offset;
+namespace math {
+using oneapi::math::transpose;
+using oneapi::math::uplo;
+using oneapi::math::side;
+using oneapi::math::diag;
+using oneapi::math::offset;
 namespace blas {
 namespace mklcpu {
 namespace column_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } // namespace column_major
 namespace row_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } // namespace row_major
 } // namespace mklcpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_BLAS_MKLCPU_HPP_
+#endif //_ONEMATH_BLAS_MKLCPU_HPP_
diff --git a/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hpp b/include/oneapi/math/blas/detail/mklgpu/blas_ct.hpp
similarity index 85%
rename from include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hpp
rename to include/oneapi/math/blas/detail/mklgpu/blas_ct.hpp
index 10ceb3b73..dac4fba96 100644
--- a/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hpp
+++ b/include/oneapi/math/blas/detail/mklgpu/blas_ct.hpp
@@ -28,14 +28,14 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backends.hpp"
 
-#include "oneapi/mkl/blas/detail/blas_ct_backends.hpp"
-#include "oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp"
+#include "oneapi/math/blas/detail/blas_ct_backends.hpp"
+#include "oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 
@@ -52,7 +52,7 @@ namespace row_major {
 
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
 #endif //_DETAIL_MKLGPU_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hxx b/include/oneapi/math/blas/detail/mklgpu/blas_ct.hxx
similarity index 72%
rename from include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hxx
rename to include/oneapi/math/blas/detail/mklgpu/blas_ct.hxx
index bfad24ca2..7ab1f887c 100644
--- a/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hxx
+++ b/include/oneapi/math/blas/detail/mklgpu/blas_ct.hxx
@@ -22,112 +22,112 @@
 void herk(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, float beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void herk(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<std::complex<double>, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void scal(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void spr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void spr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -136,9 +136,9 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -147,9 +147,9 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -158,9 +158,9 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -169,9 +169,9 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -180,9 +180,9 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -191,9 +191,9 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -202,9 +202,9 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -213,57 +213,57 @@ void gemm_batch(backend_selector<backend::mklgpu> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
@@ -271,9 +271,9 @@ void syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
@@ -281,181 +281,186 @@ void syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void her2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void her2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void hbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void hbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void rot(backend_selector<backend::mklgpu> selector, std::int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::mklgpu> selector, std::int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void axpy(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpby(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
            sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
            sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void sdsdot(backend_selector<backend::mklgpu> selector, std::int64_t n, float sb,
             sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
             std::int64_t incy, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
+                                              result);
 }
 
 void gerc(backend_selector<backend::mklgpu> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void gerc(backend_selector<backend::mklgpu> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
            sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
@@ -463,8 +468,8 @@ void syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
@@ -472,40 +477,40 @@ void syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -513,9 +518,9 @@ void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
                 std::int64_t stridex, float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -523,9 +528,9 @@ void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
                 std::int64_t stridex, double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -534,9 +539,9 @@ void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -545,9 +550,9 @@ void gemv_batch(backend_selector<backend::mklgpu> selector, transpose trans, std
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std::int64_t m,
@@ -555,9 +560,9 @@ void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std::int64_t m,
@@ -565,9 +570,9 @@ void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std::int64_t m,
@@ -575,9 +580,9 @@ void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std::int64_t m,
@@ -585,87 +590,87 @@ void dgmm_batch(backend_selector<backend::mklgpu> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void her(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void her(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void hpr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void hpr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void iamin(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::mklgpu> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::mklgpu> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void hpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void hpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx,
           double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -673,8 +678,9 @@ void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -682,8 +688,9 @@ void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -691,8 +698,9 @@ void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -700,84 +708,85 @@ void gemm_bias(backend_selector<backend::mklgpu> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void swap(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void geru(backend_selector<backend::mklgpu> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void geru(backend_selector<backend::mklgpu> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -785,8 +794,8 @@ void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpos
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -794,106 +803,106 @@ void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpos
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
           std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void syr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void syr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void ger(backend_selector<backend::mklgpu> selector, std::int64_t m, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void ger(backend_selector<backend::mklgpu> selector, std::int64_t m, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void trsm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void dotu(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotu(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void hemm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -901,8 +910,8 @@ void hemm(backend_selector<backend::mklgpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hemm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -910,40 +919,40 @@ void hemm(backend_selector<backend::mklgpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hpr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void hpr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void gbmv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -951,8 +960,8 @@ void gbmv(backend_selector<backend::mklgpu> selector, transpose trans, std::int6
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -960,52 +969,52 @@ void gbmv(backend_selector<backend::mklgpu> selector, transpose trans, std::int6
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void symm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -1013,8 +1022,8 @@ void symm(backend_selector<backend::mklgpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -1022,222 +1031,222 @@ void symm(backend_selector<backend::mklgpu> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void dotc(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotc(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void syr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void syr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void trmm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void rotmg(backend_selector<backend::mklgpu> selector, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void rotmg(backend_selector<backend::mklgpu> selector, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void copy(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void hemv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void hemv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, double alpha,
            sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose transa,
@@ -1245,8 +1254,8 @@ void gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose transa,
@@ -1254,140 +1263,140 @@ void gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void asum(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::mklgpu> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void sbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void sbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void spr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void spr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void iamax(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::mklgpu> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::mklgpu> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void rotm(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
           sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void rotm(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
           sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void dot(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::mklgpu> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -1395,9 +1404,9 @@ void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, upl
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -1405,9 +1414,9 @@ void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, upl
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -1415,9 +1424,9 @@ void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, upl
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, uplo upper_lower,
@@ -1425,9 +1434,9 @@ void trsm_batch(backend_selector<backend::mklgpu> selector, side left_right, upl
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void her2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
@@ -1435,8 +1444,8 @@ void her2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void her2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
@@ -1444,60 +1453,60 @@ void her2k(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void rotg(backend_selector<backend::mklgpu> selector, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::mklgpu> selector, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::mklgpu> selector, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::mklgpu> selector, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
-    oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void symv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void symv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -1505,8 +1514,8 @@ void omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans,
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
@@ -1514,38 +1523,38 @@ void omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans,
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -1553,9 +1562,9 @@ void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -1563,9 +1572,9 @@ void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<double, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -1574,9 +1583,9 @@ void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa,
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -1586,113 +1595,113 @@ void omatadd_batch(backend_selector<backend::mklgpu> selector, transpose transa,
                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                    std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy2(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void imatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void omatadd(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -1700,8 +1709,8 @@ void omatadd(backend_selector<backend::mklgpu> selector, transpose transa, trans
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::mklgpu> selector, transpose transa, transpose transb,
@@ -1709,8 +1718,8 @@ void omatadd(backend_selector<backend::mklgpu> selector, transpose transa, trans
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 // USM APIs
@@ -1718,8 +1727,8 @@ void omatadd(backend_selector<backend::mklgpu> selector, transpose transa, trans
 sycl::event syr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -1727,70 +1736,70 @@ sycl::event syr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  std::complex<double> alpha, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1798,8 +1807,8 @@ sycl::event trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1807,24 +1816,24 @@ sycl::event trmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1832,8 +1841,8 @@ sycl::event tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1841,24 +1850,24 @@ sycl::event tpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
@@ -1867,8 +1876,8 @@ sycl::event hpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1877,8 +1886,8 @@ sycl::event hpmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1886,7 +1895,7 @@ sycl::event syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1895,7 +1904,7 @@ sycl::event syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1905,7 +1914,7 @@ sycl::event syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                  std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1915,7 +1924,7 @@ sycl::event syrk(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                  std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1925,7 +1934,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo* upper_l
                        const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1936,7 +1945,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo* upper_l
                        const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1948,7 +1957,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo* upper_l
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1960,7 +1969,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo* upper_l
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1971,7 +1980,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lo
                        std::int64_t lda, std::int64_t stride_a, float beta, float* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1982,7 +1991,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lo
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1994,7 +2003,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lo
                        std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2006,7 +2015,7 @@ sycl::event syrk_batch(backend_selector<backend::mklgpu> selector, uplo upper_lo
                        std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2016,8 +2025,8 @@ sycl::event her2(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2025,8 +2034,8 @@ sycl::event her2(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2036,8 +2045,8 @@ sycl::event hbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2047,56 +2056,56 @@ sycl::event hbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<float>* x,
                 std::int64_t incx, std::complex<float>* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<double>* x,
                 std::int64_t incx, std::complex<double>* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklgpu> selector, std::int64_t n, float* x,
                 std::int64_t incx, float* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::mklgpu> selector, std::int64_t n, double* x,
                 std::int64_t incx, double* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
                  const float* x, std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
                  const double* x, std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2104,8 +2113,8 @@ sycl::event axpy(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2113,8 +2122,8 @@ sycl::event axpy(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2122,7 +2131,7 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t*
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2131,7 +2140,7 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2141,7 +2150,7 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t*
                        std::int64_t* incx, std::complex<float>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2151,7 +2160,7 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t*
                        std::int64_t* incx, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2160,9 +2169,9 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2170,9 +2179,9 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2181,9 +2190,9 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        std::int64_t stridex, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2192,25 +2201,25 @@ sycl::event axpy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        std::int64_t stridex, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::mklgpu> selector, std::int64_t n, float alpha,
                   const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::mklgpu> selector, std::int64_t n, double alpha,
                   const double* x, std::int64_t incx, const double beta, double* y,
                   std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2218,8 +2227,8 @@ sycl::event axpby(backend_selector<backend::mklgpu> selector, std::int64_t n,
                   std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                   const std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2227,8 +2236,8 @@ sycl::event axpby(backend_selector<backend::mklgpu> selector, std::int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2236,8 +2245,8 @@ sycl::event gerc(backend_selector<backend::mklgpu> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2245,8 +2254,8 @@ sycl::event gerc(backend_selector<backend::mklgpu> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2255,8 +2264,8 @@ sycl::event syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2265,8 +2274,8 @@ sycl::event syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2276,8 +2285,8 @@ sycl::event syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2287,8 +2296,8 @@ sycl::event syr2k(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2296,8 +2305,8 @@ sycl::event gemv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2305,8 +2314,8 @@ sycl::event gemv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2315,8 +2324,8 @@ sycl::event gemv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2325,8 +2334,8 @@ sycl::event gemv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2336,7 +2345,7 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2348,7 +2357,7 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2360,7 +2369,7 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2372,7 +2381,7 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2383,9 +2392,9 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        std::int64_t* lda, const float** x, std::int64_t* incx, float* beta,
                        float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2394,9 +2403,9 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        std::int64_t* lda, const double** x, std::int64_t* incx, double* beta,
                        double** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2406,9 +2415,9 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
                        std::complex<float>** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2419,9 +2428,9 @@ sycl::event gemv_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2430,7 +2439,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        const float* x, std::int64_t incx, std::int64_t stridex, float* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2441,7 +2450,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        const double* x, std::int64_t incx, std::int64_t stridex, double* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2453,7 +2462,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        std::int64_t stridex, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2465,7 +2474,7 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        std::int64_t stridex, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2476,9 +2485,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        const float** x, std::int64_t* incx, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2487,9 +2496,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        const double** x, std::int64_t* incx, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2498,9 +2507,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2509,9 +2518,9 @@ sycl::event dgmm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2519,8 +2528,8 @@ sycl::event her(backend_selector<backend::mklgpu> selector, uplo upper_lower, st
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -2528,56 +2537,56 @@ sycl::event her(backend_selector<backend::mklgpu> selector, uplo upper_lower, st
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklgpu> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklgpu> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklgpu> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::mklgpu> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
@@ -2587,7 +2596,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2599,7 +2608,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2611,7 +2620,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2623,7 +2632,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2635,7 +2644,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2647,7 +2656,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2659,7 +2668,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        const std::complex<float>** b, std::int64_t* ldb, std::complex<float>* beta,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2672,7 +2681,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2685,7 +2694,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2697,7 +2706,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2709,7 +2718,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2721,7 +2730,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2733,7 +2742,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        const float* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2745,7 +2754,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        const double* b, std::int64_t ldb, std::int64_t stride_b, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2758,7 +2767,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        std::int64_t stride_b, std::complex<float> beta, std::complex<float>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2771,7 +2780,7 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
                        std::int64_t stride_b, std::complex<double> beta, std::complex<double>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2780,48 +2789,48 @@ sycl::event gemm_batch(backend_selector<backend::mklgpu> selector, transpose tra
 sycl::event spmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event spmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklgpu> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklgpu> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklgpu> selector, std::int64_t n, std::complex<float>* x,
                  std::int64_t incx, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
@@ -2829,8 +2838,8 @@ sycl::event geru(backend_selector<backend::mklgpu> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2838,38 +2847,38 @@ sycl::event geru(backend_selector<backend::mklgpu> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::mklgpu> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
@@ -2878,8 +2887,8 @@ sycl::event gemm(backend_selector<backend::mklgpu> selector, transpose transa, t
                  std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2888,8 +2897,8 @@ sycl::event gemm(backend_selector<backend::mklgpu> selector, transpose transa, t
                  std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2899,8 +2908,8 @@ sycl::event gemm(backend_selector<backend::mklgpu> selector, transpose transa, t
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2910,8 +2919,8 @@ sycl::event gemm(backend_selector<backend::mklgpu> selector, transpose transa, t
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2921,8 +2930,8 @@ sycl::event gemm(backend_selector<backend::mklgpu> selector, transpose transa, t
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2931,8 +2940,8 @@ sycl::event gemm(backend_selector<backend::mklgpu> selector, transpose transa, t
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2941,8 +2950,8 @@ sycl::event gemm(backend_selector<backend::mklgpu> selector, transpose transa, t
                  std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2952,7 +2961,7 @@ sycl::event gemm_bias(backend_selector<backend::mklgpu> selector, transpose tran
                       std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2964,7 +2973,7 @@ sycl::event gemm_bias(backend_selector<backend::mklgpu> selector, transpose tran
                       std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2976,7 +2985,7 @@ sycl::event gemm_bias(backend_selector<backend::mklgpu> selector, transpose tran
                       std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2988,7 +2997,7 @@ sycl::event gemm_bias(backend_selector<backend::mklgpu> selector, transpose tran
                       std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2998,7 +3007,7 @@ sycl::event herk(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const std::complex<float>* a,
                  std::int64_t lda, float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::herk(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3007,7 +3016,7 @@ sycl::event herk(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const std::complex<double>* a,
                  std::int64_t lda, double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::herk(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3015,8 +3024,8 @@ sycl::event herk(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
 sycl::event ger(backend_selector<backend::mklgpu> selector, std::int64_t m, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                 float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3024,8 +3033,8 @@ sycl::event ger(backend_selector<backend::mklgpu> selector, std::int64_t m, std:
                 double alpha, const double* x, std::int64_t incx, const double* y,
                 std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3033,9 +3042,9 @@ sycl::event trsm(backend_selector<backend::mklgpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3043,9 +3052,9 @@ sycl::event trsm(backend_selector<backend::mklgpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3054,9 +3063,9 @@ sycl::event trsm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3065,9 +3074,9 @@ sycl::event trsm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3076,7 +3085,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3087,7 +3096,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3099,7 +3108,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3111,7 +3120,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3122,7 +3131,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3133,7 +3142,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        double** b, std::int64_t* ldb, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3145,7 +3154,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3157,7 +3166,7 @@ sycl::event trsm_batch(backend_selector<backend::mklgpu> selector, side* left_ri
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3167,8 +3176,8 @@ sycl::event dotu(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3176,8 +3185,8 @@ sycl::event dotu(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3187,8 +3196,8 @@ sycl::event hemm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3198,8 +3207,8 @@ sycl::event hemm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3207,8 +3216,8 @@ sycl::event hpr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3216,8 +3225,8 @@ sycl::event hpr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3226,8 +3235,8 @@ sycl::event gbmv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3236,8 +3245,8 @@ sycl::event gbmv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3247,8 +3256,8 @@ sycl::event gbmv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3258,15 +3267,15 @@ sycl::event gbmv(backend_selector<backend::mklgpu> selector, transpose trans, st
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3274,7 +3283,7 @@ sycl::event tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
 sycl::event tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3283,7 +3292,7 @@ sycl::event tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3292,7 +3301,7 @@ sycl::event tbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3302,8 +3311,8 @@ sycl::event symm(backend_selector<backend::mklgpu> selector, side left_right, up
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3312,8 +3321,8 @@ sycl::event symm(backend_selector<backend::mklgpu> selector, side left_right, up
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3323,8 +3332,8 @@ sycl::event symm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3334,8 +3343,8 @@ sycl::event symm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3343,8 +3352,8 @@ sycl::event dotc(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3352,24 +3361,24 @@ sycl::event dotc(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -3377,9 +3386,9 @@ sycl::event trmm(backend_selector<backend::mklgpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3387,9 +3396,9 @@ sycl::event trmm(backend_selector<backend::mklgpu> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3398,9 +3407,9 @@ sycl::event trmm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3409,39 +3418,39 @@ sycl::event trmm(backend_selector<backend::mklgpu> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::mklgpu> selector, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::mklgpu> selector, double* d1, double* d2, double* x1,
                   double y1, double* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3449,8 +3458,8 @@ sycl::event tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3458,24 +3467,24 @@ sycl::event tpsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3483,8 +3492,8 @@ sycl::event trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3492,47 +3501,47 @@ sycl::event trsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklgpu> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklgpu> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t* n, const float** x,
                        std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3541,7 +3550,7 @@ sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3550,7 +3559,7 @@ sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t*
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3559,7 +3568,7 @@ sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t*
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3568,7 +3577,7 @@ sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3577,7 +3586,7 @@ sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3586,7 +3595,7 @@ sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3595,7 +3604,7 @@ sycl::event copy_batch(backend_selector<backend::mklgpu> selector, std::int64_t
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3605,7 +3614,7 @@ sycl::event hemv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hemv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3615,7 +3624,7 @@ sycl::event hemv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::hemv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3624,9 +3633,9 @@ sycl::event gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a,
                   std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3634,9 +3643,9 @@ sycl::event gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a,
                   std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3645,9 +3654,9 @@ sycl::event gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3656,9 +3665,9 @@ sycl::event gemmt(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3667,8 +3676,8 @@ sycl::event sbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3677,45 +3686,45 @@ sycl::event sbmv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklgpu> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklgpu> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::mklgpu> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3723,7 +3732,7 @@ sycl::event tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
 sycl::event tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3732,7 +3741,7 @@ sycl::event tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3741,7 +3750,7 @@ sycl::event tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3749,78 +3758,78 @@ sycl::event tbsv(backend_selector<backend::mklgpu> selector, uplo upper_lower, t
 sycl::event spr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event spr2(backend_selector<backend::mklgpu> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklgpu> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklgpu> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklgpu> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::mklgpu> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::mklgpu> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::mklgpu> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::mklgpu> selector, float* a, float* b, float* c, float* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::mklgpu> selector, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3828,7 +3837,7 @@ sycl::event rotg(backend_selector<backend::mklgpu> selector, std::complex<float>
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3836,15 +3845,15 @@ sycl::event rotg(backend_selector<backend::mklgpu> selector, std::complex<double
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event sdsdot(backend_selector<backend::mklgpu> selector, std::int64_t n, float sb,
                    const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                    float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
-                                                         incy, result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
+                                                          incy, result, dependencies);
     return done;
 }
 
@@ -3854,8 +3863,8 @@ sycl::event her2k(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   std::int64_t ldb, float beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3865,32 +3874,32 @@ sycl::event her2k(backend_selector<backend::mklgpu> selector, uplo upper_lower,
                   std::int64_t ldb, double beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::mklgpu> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::mklgpu> selector, std::int64_t n, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::mklgpu> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
@@ -3898,7 +3907,7 @@ sycl::event symv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx,
                  float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::symv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3907,7 +3916,7 @@ sycl::event symv(backend_selector<backend::mklgpu> selector, uplo upper_lower, s
                  double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::symv(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3917,7 +3926,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3928,7 +3937,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3939,7 +3948,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3950,7 +3959,7 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3960,7 +3969,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3969,7 +3978,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3979,7 +3988,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3989,7 +3998,7 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -4000,7 +4009,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklgpu> selector, transpose
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4012,7 +4021,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklgpu> selector, transpose
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4025,7 +4034,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklgpu> selector, transpose
                           const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4038,7 +4047,7 @@ sycl::event omatadd_batch(backend_selector<backend::mklgpu> selector, transpose
                           const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4047,16 +4056,16 @@ sycl::event omatadd_batch(backend_selector<backend::mklgpu> selector, transpose
 sycl::event omatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event omatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4064,8 +4073,8 @@ sycl::event omatcopy(backend_selector<backend::mklgpu> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                      std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4073,8 +4082,8 @@ sycl::event omatcopy(backend_selector<backend::mklgpu> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                      std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4082,7 +4091,7 @@ sycl::event omatcopy2(backend_selector<backend::mklgpu> selector, transpose tran
                       std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4091,7 +4100,7 @@ sycl::event omatcopy2(backend_selector<backend::mklgpu> selector, transpose tran
                       std::int64_t n, double alpha, const double* a, std::int64_t lda,
                       std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4101,7 +4110,7 @@ sycl::event omatcopy2(backend_selector<backend::mklgpu> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4111,7 +4120,7 @@ sycl::event omatcopy2(backend_selector<backend::mklgpu> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<double>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4119,16 +4128,16 @@ sycl::event omatcopy2(backend_selector<backend::mklgpu> selector, transpose tran
 sycl::event imatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
 sycl::event imatcopy(backend_selector<backend::mklgpu> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4136,8 +4145,8 @@ sycl::event imatcopy(backend_selector<backend::mklgpu> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, std::complex<float>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4145,8 +4154,8 @@ sycl::event imatcopy(backend_selector<backend::mklgpu> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, std::complex<double>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4154,9 +4163,9 @@ sycl::event omatadd(backend_selector<backend::mklgpu> selector, transpose transa
                     std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                     float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4164,9 +4173,9 @@ sycl::event omatadd(backend_selector<backend::mklgpu> selector, transpose transa
                     std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                     double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4175,9 +4184,9 @@ sycl::event omatadd(backend_selector<backend::mklgpu> selector, transpose transa
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4186,9 +4195,9 @@ sycl::event omatadd(backend_selector<backend::mklgpu> selector, transpose transa
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4197,9 +4206,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4208,9 +4217,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4219,9 +4228,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4230,9 +4239,9 @@ sycl::event omatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4240,9 +4249,9 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4250,9 +4259,9 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4261,9 +4270,9 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4272,8 +4281,8 @@ sycl::event imatcopy_batch(backend_selector<backend::mklgpu> selector, transpose
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
diff --git a/include/oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp b/include/oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp
similarity index 77%
rename from include/oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp
rename to include/oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp
index cf5a2a398..636e89d4b 100644
--- a/include/oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp
+++ b/include/oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BLAS_MKLGPU_HPP_
-#define _ONEMKL_BLAS_MKLGPU_HPP_
+#ifndef _ONEMATH_BLAS_MKLGPU_HPP_
+#define _ONEMATH_BLAS_MKLGPU_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -28,27 +28,27 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklgpu {
 namespace column_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } //namespace column_major
 namespace row_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } //namespace row_major
 } //namespace mklgpu
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_BLAS_MKLGPU_HPP_
+#endif //_ONEMATH_BLAS_MKLGPU_HPP_
diff --git a/include/oneapi/mkl/blas/detail/netlib/blas_ct.hpp b/include/oneapi/math/blas/detail/netlib/blas_ct.hpp
similarity index 85%
rename from include/oneapi/mkl/blas/detail/netlib/blas_ct.hpp
rename to include/oneapi/math/blas/detail/netlib/blas_ct.hpp
index cdfc79e7f..5e4b4d727 100644
--- a/include/oneapi/mkl/blas/detail/netlib/blas_ct.hpp
+++ b/include/oneapi/math/blas/detail/netlib/blas_ct.hpp
@@ -28,13 +28,13 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
-#include "oneapi/mkl/blas/detail/blas_ct_backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backends.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
+#include "oneapi/math/blas/detail/blas_ct_backends.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 
@@ -51,7 +51,7 @@ namespace row_major {
 
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
 #endif //_DETAIL_NETLIB_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/blas/detail/netlib/blas_ct.hxx b/include/oneapi/math/blas/detail/netlib/blas_ct.hxx
similarity index 72%
rename from include/oneapi/mkl/blas/detail/netlib/blas_ct.hxx
rename to include/oneapi/math/blas/detail/netlib/blas_ct.hxx
index 4c94213fb..c1ff9b629 100644
--- a/include/oneapi/mkl/blas/detail/netlib/blas_ct.hxx
+++ b/include/oneapi/math/blas/detail/netlib/blas_ct.hxx
@@ -22,112 +22,112 @@
 void herk(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, float beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void herk(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<std::complex<double>, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void scal(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void trmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void spr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void spr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -136,9 +136,9 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -147,9 +147,9 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -158,9 +158,9 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -169,9 +169,9 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -180,9 +180,9 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -191,9 +191,9 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -202,9 +202,9 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -213,57 +213,57 @@ void gemm_batch(backend_selector<backend::netlib> selector, transpose transa, tr
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, stride_a, b, ldb, stride_b, beta, c,
-                                                 ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                  c, ldc, stride_c, batch_size);
 }
 
 void syrk(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a,
-                                           lda, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                            a, lda, beta, c, ldc);
 }
 
 void syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
@@ -271,9 +271,9 @@ void syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
@@ -281,181 +281,186 @@ void syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lower, tr
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                  batch_size);
 }
 
 void her2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void her2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void hbmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void hbmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void rot(backend_selector<backend::netlib> selector, std::int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::netlib> selector, std::int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void axpy(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy_batch(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::netlib> selector, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::netlib> selector, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                 y, incy, stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                  y, incy, stridey, batch_size);
 }
 
 void axpby(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
            sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
            sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void axpby(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                             incy);
 }
 
 void sdsdot(backend_selector<backend::netlib> selector, std::int64_t n, float sb,
             sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
             std::int64_t incy, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
+                                              result);
 }
 
 void gerc(backend_selector<backend::netlib> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void gerc(backend_selector<backend::netlib> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void syr2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
            sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
@@ -463,8 +468,8 @@ void syr2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
@@ -472,40 +477,40 @@ void syr2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
           std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
           std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -513,9 +518,9 @@ void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
                 std::int64_t stridex, float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -523,9 +528,9 @@ void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std
                 std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
                 std::int64_t stridex, double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -534,9 +539,9 @@ void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -545,9 +550,9 @@ void gemv_batch(backend_selector<backend::netlib> selector, transpose trans, std
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std::int64_t m,
@@ -555,9 +560,9 @@ void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std::int64_t m,
@@ -565,9 +570,9 @@ void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std::int64_t m,
@@ -575,9 +580,9 @@ void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std::int64_t m,
@@ -585,87 +590,87 @@ void dgmm_batch(backend_selector<backend::netlib> selector, side left_right, std
                 std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                 stridea, x, incx, stridex, c, ldc, stridec,
-                                                 batch_size);
+    oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                  stridea, x, incx, stridex, c, ldc, stridec,
+                                                  batch_size);
 }
 
 void her(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void her(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void hpr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void hpr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
 }
 
 void iamin(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::netlib> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::netlib> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void hpmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void hpmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void spmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx,
           double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                           beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
+                                            beta, y, incy);
 }
 
 void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -673,8 +678,9 @@ void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -682,8 +688,9 @@ void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, tra
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -691,8 +698,9 @@ void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -700,84 +708,85 @@ void gemm_bias(backend_selector<backend::netlib> selector, transpose transa, tra
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n,
-                                                k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
+    oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                 co);
 }
 
 void swap(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void geru(backend_selector<backend::netlib> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void geru(backend_selector<backend::netlib> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void nrm2(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void gemm(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -785,8 +794,8 @@ void gemm(backend_selector<backend::netlib> selector, transpose transa, transpos
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -794,106 +803,106 @@ void gemm(backend_selector<backend::netlib> selector, transpose transa, transpos
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
           std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                           lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
+                                            lda, b, ldb, beta, c, ldc);
 }
 
 void syr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void syr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a, lda);
+    oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a, lda);
 }
 
 void ger(backend_selector<backend::netlib> selector, std::int64_t m, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void ger(backend_selector<backend::netlib> selector, std::int64_t m, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                          lda);
+    oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                           lda);
 }
 
 void trsm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void dotu(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotu(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void hemm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -901,8 +910,8 @@ void hemm(backend_selector<backend::netlib> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hemm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -910,40 +919,40 @@ void hemm(backend_selector<backend::netlib> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hpr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void hpr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void gbmv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -951,8 +960,8 @@ void gbmv(backend_selector<backend::netlib> selector, transpose trans, std::int6
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -960,52 +969,52 @@ void gbmv(backend_selector<backend::netlib> selector, transpose trans, std::int6
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                            lda, x, incx, beta, y, incy);
 }
 
 void tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void symm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -1013,8 +1022,8 @@ void symm(backend_selector<backend::netlib> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -1022,222 +1031,222 @@ void symm(backend_selector<backend::netlib> selector, side left_right, uplo uppe
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                           alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                            alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void dotc(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotc(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void syr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void syr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                          lda);
+    oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                           lda);
 }
 
 void trmm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                           unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                            unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void rotmg(backend_selector<backend::netlib> selector, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void rotmg(backend_selector<backend::netlib> selector, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, x, incx);
 }
 
 void trsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            a, lda, x, incx);
 }
 
 void copy(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy_batch(backend_selector<backend::netlib> selector, std::int64_t n,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::netlib> selector, std::int64_t n,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::netlib> selector, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::netlib> selector, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy,
-                                                 stridey, batch_size);
+    oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                  incy, stridey, batch_size);
 }
 
 void hemv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void hemv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void gemmt(backend_selector<backend::netlib> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::netlib> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, double alpha,
            sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::netlib> selector, uplo upper_lower, transpose transa,
@@ -1245,8 +1254,8 @@ void gemmt(backend_selector<backend::netlib> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::netlib> selector, uplo upper_lower, transpose transa,
@@ -1254,140 +1263,140 @@ void gemmt(backend_selector<backend::netlib> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void asum(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::netlib> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void sbmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void sbmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                           x, incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                            x, incx, beta, y, incy);
 }
 
 void tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                           k, a, lda, x, incx);
+    oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                            k, a, lda, x, incx);
 }
 
 void spr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void spr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                           incy, a);
+    oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
+                                            incy, a);
 }
 
 void iamax(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::netlib> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::netlib> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void rotm(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
           sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void rotm(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
           sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void dot(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::netlib> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void trsm_batch(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -1395,9 +1404,9 @@ void trsm_batch(backend_selector<backend::netlib> selector, side left_right, upl
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -1405,9 +1414,9 @@ void trsm_batch(backend_selector<backend::netlib> selector, side left_right, upl
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -1415,9 +1424,9 @@ void trsm_batch(backend_selector<backend::netlib> selector, side left_right, upl
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::netlib> selector, side left_right, uplo upper_lower,
@@ -1425,9 +1434,9 @@ void trsm_batch(backend_selector<backend::netlib> selector, side left_right, upl
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                 trans, unit_diag, m, n, alpha, a, lda, stride_a, b,
-                                                 ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                  b, ldb, stride_b, batch_size);
 }
 
 void her2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
@@ -1435,8 +1444,8 @@ void her2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void her2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
@@ -1444,60 +1453,60 @@ void her2k(backend_selector<backend::netlib> selector, uplo upper_lower, transpo
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void rotg(backend_selector<backend::netlib> selector, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
-    oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::netlib> selector, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
-    oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::netlib> selector, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
-    oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::netlib> selector, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
-    oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void symv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void symv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                           incx, beta, y, incy);
+    oneapi::math::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                            incx, beta, y, incy);
 }
 
 void omatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -1505,8 +1514,8 @@ void omatcopy_batch(backend_selector<backend::netlib> selector, transpose trans,
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
@@ -1514,38 +1523,38 @@ void omatcopy_batch(backend_selector<backend::netlib> selector, transpose trans,
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                     lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                      lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                     lda, ldb, stride, batch_size);
+    oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                      lda, ldb, stride, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -1553,9 +1562,9 @@ void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -1563,9 +1572,9 @@ void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa,
                    std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<double, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -1574,9 +1583,9 @@ void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa,
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -1586,113 +1595,113 @@ void omatadd_batch(backend_selector<backend::netlib> selector, transpose transa,
                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                    std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                    alpha, a, lda, stride_a, beta, b, ldb, stride_b,
-                                                    c, ldc, stride_c, batch_size);
+    oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                     alpha, a, lda, stride_a, beta, b, ldb,
+                                                     stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
+                                                ldb);
 }
 
 void omatcopy2(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                stridea, b, ldb, strideb);
+    oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 stridea, b, ldb, strideb);
 }
 
 void imatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void imatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                               ldb);
+    oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                ldb);
 }
 
 void omatadd(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -1700,8 +1709,8 @@ void omatadd(backend_selector<backend::netlib> selector, transpose transa, trans
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::netlib> selector, transpose transa, transpose transb,
@@ -1709,8 +1718,8 @@ void omatadd(backend_selector<backend::netlib> selector, transpose transa, trans
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                              lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
+                                               lda, beta, b, ldb, c, ldc);
 }
 
 // USM APIs
@@ -1718,8 +1727,8 @@ void omatadd(backend_selector<backend::netlib> selector, transpose transa, trans
 sycl::event syr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -1727,70 +1736,70 @@ sycl::event syr2(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::netlib> selector, std::int64_t n, float alpha, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::netlib> selector, std::int64_t n,
                  std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::netlib> selector, std::int64_t n,
                  std::complex<double> alpha, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                        dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1798,8 +1807,8 @@ sycl::event trmv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -1807,24 +1816,24 @@ sycl::event trmv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1832,8 +1841,8 @@ sycl::event tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1841,24 +1850,24 @@ sycl::event tpmv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
@@ -1867,8 +1876,8 @@ sycl::event hpmv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1877,8 +1886,8 @@ sycl::event hpmv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1886,7 +1895,7 @@ sycl::event syrk(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1895,7 +1904,7 @@ sycl::event syrk(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1905,7 +1914,7 @@ sycl::event syrk(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                  std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1915,7 +1924,7 @@ sycl::event syrk(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                  std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1925,7 +1934,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo* upper_l
                        const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1936,7 +1945,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo* upper_l
                        const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1948,7 +1957,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo* upper_l
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1960,7 +1969,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo* upper_l
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1971,7 +1980,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lo
                        std::int64_t lda, std::int64_t stride_a, float beta, float* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1982,7 +1991,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lo
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1994,7 +2003,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lo
                        std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2006,7 +2015,7 @@ sycl::event syrk_batch(backend_selector<backend::netlib> selector, uplo upper_lo
                        std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2016,8 +2025,8 @@ sycl::event her2(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2025,8 +2034,8 @@ sycl::event her2(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2036,8 +2045,8 @@ sycl::event hbmv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2047,56 +2056,56 @@ sycl::event hbmv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<float>* x,
                 std::int64_t incx, std::complex<float>* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<double>* x,
                 std::int64_t incx, std::complex<double>* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::netlib> selector, std::int64_t n, float* x,
                 std::int64_t incx, float* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::netlib> selector, std::int64_t n, double* x,
                 std::int64_t incx, double* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                      s, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
+                                                       s, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
                  const float* x, std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
                  const double* x, std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2104,8 +2113,8 @@ sycl::event axpy(backend_selector<backend::netlib> selector, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2113,8 +2122,8 @@ sycl::event axpy(backend_selector<backend::netlib> selector, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                       incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                        incy, dependencies);
     return done;
 }
 
@@ -2122,7 +2131,7 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t*
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2131,7 +2140,7 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2141,7 +2150,7 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t*
                        std::int64_t* incx, std::complex<float>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2151,7 +2160,7 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t*
                        std::int64_t* incx, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2160,9 +2169,9 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2170,9 +2179,9 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2181,9 +2190,9 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        std::int64_t stridex, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
@@ -2192,25 +2201,25 @@ sycl::event axpy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        std::int64_t stridex, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                             incx, stridex, y, incy, stridey,
-                                                             batch_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                              incx, stridex, y, incy, stridey,
+                                                              batch_size, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::netlib> selector, std::int64_t n, float alpha,
                   const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::netlib> selector, std::int64_t n, double alpha,
                   const double* x, std::int64_t incx, const double beta, double* y,
                   std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2218,8 +2227,8 @@ sycl::event axpby(backend_selector<backend::netlib> selector, std::int64_t n,
                   std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                   const std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2227,8 +2236,8 @@ sycl::event axpby(backend_selector<backend::netlib> selector, std::int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                        beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                         beta, y, incy, dependencies);
     return done;
 }
 
@@ -2236,8 +2245,8 @@ sycl::event gerc(backend_selector<backend::netlib> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2245,8 +2254,8 @@ sycl::event gerc(backend_selector<backend::netlib> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2255,8 +2264,8 @@ sycl::event syr2k(backend_selector<backend::netlib> selector, uplo upper_lower,
                   const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2265,8 +2274,8 @@ sycl::event syr2k(backend_selector<backend::netlib> selector, uplo upper_lower,
                   const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2276,8 +2285,8 @@ sycl::event syr2k(backend_selector<backend::netlib> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2287,8 +2296,8 @@ sycl::event syr2k(backend_selector<backend::netlib> selector, uplo upper_lower,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2296,8 +2305,8 @@ sycl::event gemv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2305,8 +2314,8 @@ sycl::event gemv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2315,8 +2324,8 @@ sycl::event gemv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2325,8 +2334,8 @@ sycl::event gemv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
+                                                        lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2336,7 +2345,7 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose tra
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2348,7 +2357,7 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose tra
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2360,7 +2369,7 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2372,7 +2381,7 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose tra
                        std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2383,9 +2392,9 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose* tr
                        std::int64_t* lda, const float** x, std::int64_t* incx, float* beta,
                        float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2394,9 +2403,9 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose* tr
                        std::int64_t* lda, const double** x, std::int64_t* incx, double* beta,
                        double** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2406,9 +2415,9 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose* tr
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
                        std::complex<float>** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2419,9 +2428,9 @@ sycl::event gemv_batch(backend_selector<backend::netlib> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, x, incx, beta, y, incy,
-                                                             group_count, group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
+        group_size, dependencies);
     return done;
 }
 
@@ -2430,7 +2439,7 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side left_rig
                        const float* x, std::int64_t incx, std::int64_t stridex, float* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2441,7 +2450,7 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side left_rig
                        const double* x, std::int64_t incx, std::int64_t stridex, double* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2453,7 +2462,7 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side left_rig
                        std::int64_t stridex, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2465,7 +2474,7 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side left_rig
                        std::int64_t stridex, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2476,9 +2485,9 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        const float** x, std::int64_t* incx, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2487,9 +2496,9 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        const double** x, std::int64_t* incx, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2498,9 +2507,9 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2509,9 +2518,9 @@ sycl::event dgmm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n,
-                                                             a, lda, x, incx, c, ldc, group_count,
-                                                             group_size, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(
+        selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
+        dependencies);
     return done;
 }
 
@@ -2519,8 +2528,8 @@ sycl::event her(backend_selector<backend::netlib> selector, uplo upper_lower, st
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -2528,56 +2537,56 @@ sycl::event her(backend_selector<backend::netlib> selector, uplo upper_lower, st
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::netlib> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::netlib> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::netlib> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::netlib> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
@@ -2587,7 +2596,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2599,7 +2608,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2611,7 +2620,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        const std::complex<float>** b, std::int64_t* ldb, std::complex<float>* beta,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2624,7 +2633,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2636,7 +2645,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2648,7 +2657,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2660,7 +2669,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2672,7 +2681,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose* tr
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2684,7 +2693,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        const float* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2696,7 +2705,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        const double* b, std::int64_t ldb, std::int64_t stride_b, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2709,7 +2718,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        std::int64_t stride_b, std::complex<float> beta, std::complex<float>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2722,7 +2731,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        std::int64_t stride_b, std::complex<double> beta, std::complex<double>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2735,7 +2744,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2747,7 +2756,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2759,7 +2768,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2771,7 +2780,7 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2780,48 +2789,48 @@ sycl::event gemm_batch(backend_selector<backend::netlib> selector, transpose tra
 sycl::event spmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event spmv(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                       a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
+                                                        a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::netlib> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::netlib> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::netlib> selector, std::int64_t n, std::complex<float>* x,
                  std::int64_t incx, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::netlib> selector, std::int64_t n,
                  std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
@@ -2829,8 +2838,8 @@ sycl::event geru(backend_selector<backend::netlib> selector, std::int64_t m, std
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2838,38 +2847,38 @@ sycl::event geru(backend_selector<backend::netlib> selector, std::int64_t m, std
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::netlib> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::netlib> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
@@ -2878,8 +2887,8 @@ sycl::event gemm(backend_selector<backend::netlib> selector, transpose transa, t
                  std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2888,8 +2897,8 @@ sycl::event gemm(backend_selector<backend::netlib> selector, transpose transa, t
                  std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2899,8 +2908,8 @@ sycl::event gemm(backend_selector<backend::netlib> selector, transpose transa, t
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2910,8 +2919,8 @@ sycl::event gemm(backend_selector<backend::netlib> selector, transpose transa, t
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2921,8 +2930,8 @@ sycl::event gemm(backend_selector<backend::netlib> selector, transpose transa, t
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2931,8 +2940,8 @@ sycl::event gemm(backend_selector<backend::netlib> selector, transpose transa, t
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2941,8 +2950,8 @@ sycl::event gemm(backend_selector<backend::netlib> selector, transpose transa, t
                  std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                               a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2952,7 +2961,7 @@ sycl::event gemm_bias(backend_selector<backend::netlib> selector, transpose tran
                       std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2964,7 +2973,7 @@ sycl::event gemm_bias(backend_selector<backend::netlib> selector, transpose tran
                       std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2976,7 +2985,7 @@ sycl::event gemm_bias(backend_selector<backend::netlib> selector, transpose tran
                       std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2988,7 +2997,7 @@ sycl::event gemm_bias(backend_selector<backend::netlib> selector, transpose tran
                       std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2998,7 +3007,7 @@ sycl::event herk(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, float alpha, const std::complex<float>* a,
                  std::int64_t lda, float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::herk(
+    auto done = oneapi::math::blas::netlib::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3007,7 +3016,7 @@ sycl::event herk(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  std::int64_t n, std::int64_t k, double alpha, const std::complex<double>* a,
                  std::int64_t lda, double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::herk(
+    auto done = oneapi::math::blas::netlib::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3015,8 +3024,8 @@ sycl::event herk(backend_selector<backend::netlib> selector, uplo upper_lower, t
 sycl::event ger(backend_selector<backend::netlib> selector, std::int64_t m, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                 float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3024,8 +3033,8 @@ sycl::event ger(backend_selector<backend::netlib> selector, std::int64_t m, std:
                 double alpha, const double* x, std::int64_t incx, const double* y,
                 std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y,
-                                                      incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                       y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3033,9 +3042,9 @@ sycl::event trsm(backend_selector<backend::netlib> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3043,9 +3052,9 @@ sycl::event trsm(backend_selector<backend::netlib> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3054,9 +3063,9 @@ sycl::event trsm(backend_selector<backend::netlib> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3065,9 +3074,9 @@ sycl::event trsm(backend_selector<backend::netlib> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3076,7 +3085,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side left_rig
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3087,7 +3096,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side left_rig
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3099,7 +3108,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3111,7 +3120,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side left_rig
                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3122,7 +3131,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3133,7 +3142,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        double** b, std::int64_t* ldb, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3145,7 +3154,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3157,7 +3166,7 @@ sycl::event trsm_batch(backend_selector<backend::netlib> selector, side* left_ri
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3167,8 +3176,8 @@ sycl::event dotu(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3176,8 +3185,8 @@ sycl::event dotu(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3187,8 +3196,8 @@ sycl::event hemm(backend_selector<backend::netlib> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3198,8 +3207,8 @@ sycl::event hemm(backend_selector<backend::netlib> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3207,8 +3216,8 @@ sycl::event hpr2(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3216,8 +3225,8 @@ sycl::event hpr2(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3226,8 +3235,8 @@ sycl::event gbmv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3236,8 +3245,8 @@ sycl::event gbmv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3247,8 +3256,8 @@ sycl::event gbmv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3258,15 +3267,15 @@ sycl::event gbmv(backend_selector<backend::netlib> selector, transpose trans, st
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3274,7 +3283,7 @@ sycl::event tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, t
 sycl::event tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3283,7 +3292,7 @@ sycl::event tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3292,7 +3301,7 @@ sycl::event tbmv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3302,8 +3311,8 @@ sycl::event symm(backend_selector<backend::netlib> selector, side left_right, up
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3312,8 +3321,8 @@ sycl::event symm(backend_selector<backend::netlib> selector, side left_right, up
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3323,8 +3332,8 @@ sycl::event symm(backend_selector<backend::netlib> selector, side left_right, up
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3334,8 +3343,8 @@ sycl::event symm(backend_selector<backend::netlib> selector, side left_right, up
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                               alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3343,8 +3352,8 @@ sycl::event dotc(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
@@ -3352,24 +3361,24 @@ sycl::event dotc(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                      x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                       x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -3377,9 +3386,9 @@ sycl::event trmm(backend_selector<backend::netlib> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3387,9 +3396,9 @@ sycl::event trmm(backend_selector<backend::netlib> selector, side left_right, up
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3398,9 +3407,9 @@ sycl::event trmm(backend_selector<backend::netlib> selector, side left_right, up
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3409,39 +3418,39 @@ sycl::event trmm(backend_selector<backend::netlib> selector, side left_right, up
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
-                                                       upper_lower, trans, unit_diag, m, n, alpha,
-                                                       a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right,
+                                                        upper_lower, trans, unit_diag, m, n, alpha,
+                                                        a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::netlib> selector, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::netlib> selector, double* d1, double* d2, double* x1,
                   double y1, double* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3449,8 +3458,8 @@ sycl::event tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3458,24 +3467,24 @@ sycl::event tpsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                        unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3483,8 +3492,8 @@ sycl::event trsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
@@ -3492,47 +3501,47 @@ sycl::event trsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans,
-                                                       unit_diag, n, a, lda, x, incx, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::trsv(
+        selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::netlib> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::netlib> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                        dependencies);
     return done;
 }
 
 sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t* n, const float** x,
                        std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3541,7 +3550,7 @@ sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t*
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3550,7 +3559,7 @@ sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t*
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3559,7 +3568,7 @@ sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t*
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3568,7 +3577,7 @@ sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3577,7 +3586,7 @@ sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3586,7 +3595,7 @@ sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3595,7 +3604,7 @@ sycl::event copy_batch(backend_selector<backend::netlib> selector, std::int64_t
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3605,7 +3614,7 @@ sycl::event hemv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hemv(
+    auto done = oneapi::math::blas::netlib::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3615,7 +3624,7 @@ sycl::event hemv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::hemv(
+    auto done = oneapi::math::blas::netlib::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3624,9 +3633,9 @@ sycl::event gemmt(backend_selector<backend::netlib> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a,
                   std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3634,9 +3643,9 @@ sycl::event gemmt(backend_selector<backend::netlib> selector, uplo upper_lower,
                   transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a,
                   std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3645,9 +3654,9 @@ sycl::event gemmt(backend_selector<backend::netlib> selector, uplo upper_lower,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3656,9 +3665,9 @@ sycl::event gemmt(backend_selector<backend::netlib> selector, uplo upper_lower,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                        transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                        c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                         c, ldc, dependencies);
     return done;
 }
 
@@ -3667,8 +3676,8 @@ sycl::event sbmv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3677,45 +3686,45 @@ sycl::event sbmv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                               lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::netlib> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::netlib> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::netlib> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                       dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                        dependencies);
     return done;
 }
 
 sycl::event tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3723,7 +3732,7 @@ sycl::event tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
 sycl::event tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3732,7 +3741,7 @@ sycl::event tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3741,7 +3750,7 @@ sycl::event tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv(
+    auto done = oneapi::math::blas::netlib::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3749,78 +3758,78 @@ sycl::event tbsv(backend_selector<backend::netlib> selector, uplo upper_lower, t
 sycl::event spr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event spr2(backend_selector<backend::netlib> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::netlib> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::netlib> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::netlib> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::netlib> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::netlib> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::netlib> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                       param, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                        param, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::netlib> selector, float* a, float* b, float* c, float* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::netlib> selector, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3828,7 +3837,7 @@ sycl::event rotg(backend_selector<backend::netlib> selector, std::complex<float>
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3836,15 +3845,15 @@ sycl::event rotg(backend_selector<backend::netlib> selector, std::complex<double
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event sdsdot(backend_selector<backend::netlib> selector, std::int64_t n, float sb,
                    const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                    float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
-                                                         incy, result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
+                                                          incy, result, dependencies);
     return done;
 }
 
@@ -3854,8 +3863,8 @@ sycl::event her2k(backend_selector<backend::netlib> selector, uplo upper_lower,
                   std::int64_t ldb, float beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3865,32 +3874,32 @@ sycl::event her2k(backend_selector<backend::netlib> selector, uplo upper_lower,
                   std::int64_t ldb, double beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::netlib> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::netlib> selector, std::int64_t n, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::netlib> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                      result, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                       result, dependencies);
     return done;
 }
 
@@ -3898,7 +3907,7 @@ sycl::event symv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx,
                  float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::symv(
+    auto done = oneapi::math::blas::netlib::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3907,7 +3916,7 @@ sycl::event symv(backend_selector<backend::netlib> selector, uplo upper_lower, s
                  double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::symv(
+    auto done = oneapi::math::blas::netlib::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3917,7 +3926,7 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3928,7 +3937,7 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3939,7 +3948,7 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3950,7 +3959,7 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3960,7 +3969,7 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3969,7 +3978,7 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3979,7 +3988,7 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3989,7 +3998,7 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -4000,7 +4009,7 @@ sycl::event omatadd_batch(backend_selector<backend::netlib> selector, transpose
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4012,7 +4021,7 @@ sycl::event omatadd_batch(backend_selector<backend::netlib> selector, transpose
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4025,7 +4034,7 @@ sycl::event omatadd_batch(backend_selector<backend::netlib> selector, transpose
                           const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4038,7 +4047,7 @@ sycl::event omatadd_batch(backend_selector<backend::netlib> selector, transpose
                           const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4047,16 +4056,16 @@ sycl::event omatadd_batch(backend_selector<backend::netlib> selector, transpose
 sycl::event omatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event omatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4064,8 +4073,8 @@ sycl::event omatcopy(backend_selector<backend::netlib> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                      std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4073,8 +4082,8 @@ sycl::event omatcopy(backend_selector<backend::netlib> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                      std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4082,7 +4091,7 @@ sycl::event omatcopy2(backend_selector<backend::netlib> selector, transpose tran
                       std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4091,7 +4100,7 @@ sycl::event omatcopy2(backend_selector<backend::netlib> selector, transpose tran
                       std::int64_t n, double alpha, const double* a, std::int64_t lda,
                       std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4101,7 +4110,7 @@ sycl::event omatcopy2(backend_selector<backend::netlib> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4111,7 +4120,7 @@ sycl::event omatcopy2(backend_selector<backend::netlib> selector, transpose tran
                       std::int64_t lda, std::int64_t stridea, std::complex<double>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4119,16 +4128,16 @@ sycl::event omatcopy2(backend_selector<backend::netlib> selector, transpose tran
 sycl::event imatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
 sycl::event imatcopy(backend_selector<backend::netlib> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4136,8 +4145,8 @@ sycl::event imatcopy(backend_selector<backend::netlib> selector, transpose trans
                      std::int64_t n, std::complex<float> alpha, std::complex<float>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4145,8 +4154,8 @@ sycl::event imatcopy(backend_selector<backend::netlib> selector, transpose trans
                      std::int64_t n, std::complex<double> alpha, std::complex<double>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha,
-                                                           ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                            alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4154,9 +4163,9 @@ sycl::event omatadd(backend_selector<backend::netlib> selector, transpose transa
                     std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                     float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4164,9 +4173,9 @@ sycl::event omatadd(backend_selector<backend::netlib> selector, transpose transa
                     std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                     double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4175,9 +4184,9 @@ sycl::event omatadd(backend_selector<backend::netlib> selector, transpose transa
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4186,9 +4195,9 @@ sycl::event omatadd(backend_selector<backend::netlib> selector, transpose transa
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                  a, lda, beta, b, ldb, c, ldc, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                           dependencies);
     return done;
 }
 
@@ -4197,9 +4206,9 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4208,9 +4217,9 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4219,9 +4228,9 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4230,9 +4239,9 @@ sycl::event omatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, a, lda, b, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4240,9 +4249,9 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4250,9 +4259,9 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4261,9 +4270,9 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
 
@@ -4272,8 +4281,8 @@ sycl::event imatcopy_batch(backend_selector<backend::netlib> selector, transpose
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                 alpha, ab, lda, ldb, group_count,
-                                                                 groupsize, dependencies);
+    auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
+                                                                  alpha, ab, lda, ldb, group_count,
+                                                                  groupsize, dependencies);
     return done;
 }
diff --git a/include/oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp b/include/oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp
similarity index 70%
rename from include/oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp
rename to include/oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp
index cb96c9a27..4c6a5f0ea 100644
--- a/include/oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp
+++ b/include/oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BLAS_NETLIB_HPP_
-#define _ONEMKL_BLAS_NETLIB_HPP_
+#ifndef _ONEMATH_BLAS_NETLIB_HPP_
+#define _ONEMATH_BLAS_NETLIB_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -29,34 +29,34 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
-using oneapi::mkl::transpose;
-using oneapi::mkl::uplo;
-using oneapi::mkl::side;
-using oneapi::mkl::diag;
-using oneapi::mkl::offset;
+using oneapi::math::transpose;
+using oneapi::math::uplo;
+using oneapi::math::side;
+using oneapi::math::diag;
+using oneapi::math::offset;
 
 namespace blas {
 namespace netlib {
 namespace column_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } //namespace column_major
 namespace row_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } //namespace row_major
 } //namespace netlib
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_BLAS_NETLIB_HPP_
+#endif //_ONEMATH_BLAS_NETLIB_HPP_
diff --git a/include/oneapi/math/blas/detail/onemath_blas_backends.hxx b/include/oneapi/math/blas/detail/onemath_blas_backends.hxx
new file mode 100644
index 000000000..06ea19b75
--- /dev/null
+++ b/include/oneapi/math/blas/detail/onemath_blas_backends.hxx
@@ -0,0 +1,2898 @@
+/*******************************************************************************
+* Copyright 2020-2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+// Buffer APIs
+
+ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                         oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                         std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
+                         sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                         oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                         std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
+                         sycl::buffer<double, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                         oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                         std::int64_t k, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
+                         std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                         oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                         std::int64_t k, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
+                         std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                         oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a,
+                         std::int64_t lda, sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
+                         sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                         oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                         std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a,
+                         std::int64_t lda, sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
+                         float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                         oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                         std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
+                         std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb,
+                         float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
+                         sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
+                         sycl::buffer<double, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                         std::int64_t ldb, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                         std::int64_t ldb, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void hemm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                         std::int64_t ldb, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void hemm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                         std::int64_t ldb, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
+                         sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
+                         sycl::buffer<double, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                               float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
+                               std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                               double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
+                               std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                               std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
+                               sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                               std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
+                               sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void herk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, float beta,
+                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void herk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                         double alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         double beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                          float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                          sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
+                          sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                          double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                          sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
+                          sycl::buffer<double, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                          std::int64_t ldb, std::complex<float> beta,
+                          sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                          std::int64_t ldb, std::complex<double> beta,
+                          sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void her2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                          std::int64_t ldb, float beta, sycl::buffer<std::complex<float>, 1>& c,
+                          std::int64_t ldc);
+
+ONEMATH_EXPORT void her2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                          std::int64_t ldb, double beta, sycl::buffer<std::complex<double>, 1>& c,
+                          std::int64_t ldc);
+
+ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right,
+                         oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                         oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
+                         std::int64_t ldb);
+
+ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                         sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                               std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                               std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, float beta,
+                               sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                               std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                               std::int64_t lda, std::int64_t stridea, sycl::buffer<double, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, double beta,
+                               sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                               std::int64_t n, std::complex<float> alpha,
+                               sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
+                               sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                               std::int64_t n, std::complex<double> alpha,
+                               sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
+                               sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                               std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
+                               std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<float, 1>& c, std::int64_t ldc,
+                               std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
+                               std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
+                               std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<double, 1>& c, std::int64_t ldc,
+                               std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
+                               std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
+                               std::int64_t lda, std::int64_t stridea,
+                               sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c,
+                               std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
+                               std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
+                               std::int64_t lda, std::int64_t stridea,
+                               sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c,
+                               std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha,
+                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
+                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t kl, std::int64_t ku,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t kl, std::int64_t ku,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha,
+                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+                        std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha,
+                        sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+                        std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::int64_t k, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::int64_t k, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, std::complex<float> beta,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, std::complex<double> beta,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                        sycl::buffer<std::complex<float>, 1>& a);
+
+ONEMATH_EXPORT void hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                        sycl::buffer<std::complex<double>, 1>& a);
+
+ONEMATH_EXPORT void hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a);
+
+ONEMATH_EXPORT void hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
+                         std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a);
+
+ONEMATH_EXPORT void sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                         sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                         sycl::buffer<float, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                         sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                        sycl::buffer<float, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                        sycl::buffer<double, 1>& a, std::int64_t lda);
+
+ONEMATH_EXPORT void syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a,
+                         std::int64_t lda);
+
+ONEMATH_EXPORT void spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         float alpha, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                        sycl::buffer<float, 1>& a);
+
+ONEMATH_EXPORT void spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                        double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                        sycl::buffer<double, 1>& a);
+
+ONEMATH_EXPORT void spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a);
+
+ONEMATH_EXPORT void spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                         double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a);
+
+ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
+                         std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
+                         std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
+                         std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
+                         std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
+                         sycl::buffer<float, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
+                         sycl::buffer<double, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                         oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                         std::int64_t n, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void dotc(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& result);
+
+ONEMATH_EXPORT void dotc(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& result);
+
+ONEMATH_EXPORT void dotu(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<float>, 1>& result);
+
+ONEMATH_EXPORT void dotu(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
+                         sycl::buffer<std::complex<double>, 1>& result);
+
+ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                          std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                          std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                          std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                          std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                          sycl::buffer<std::int64_t, 1>& result);
+
+ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& result);
+
+ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, sycl::buffer<float, 1>& result);
+
+ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, double alpha,
+                         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+                         std::int64_t incy);
+
+ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha,
+                               sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha,
+                               sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
+                               sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                               sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
+                               sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, float alpha,
+                          sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
+                          sycl::buffer<float, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, double alpha,
+                          sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
+                          sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                          std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
+                          std::int64_t incy);
+
+ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
+                          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                          std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
+                          std::int64_t incy);
+
+ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, sycl::buffer<float, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                               std::int64_t incx, std::int64_t stridex, sycl::buffer<double, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n,
+                               sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n,
+                               sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                               std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
+                               std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
+
+ONEMATH_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                        std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
+                        sycl::buffer<float, 1>& result);
+
+ONEMATH_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                        std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
+                        sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer<float, 1>& x,
+                           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
+                           sycl::buffer<float, 1>& result);
+
+ONEMATH_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                        std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
+                        sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<float, 1>& result);
+
+ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, sycl::buffer<float, 1>& result);
+
+ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, sycl::buffer<double, 1>& result);
+
+ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
+                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
+                        std::int64_t incy, float c, float s);
+
+ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n,
+                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c,
+                        double s);
+
+ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                        std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c,
+                        float s);
+
+ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                        std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c,
+                        double s);
+
+ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& b,
+                         sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s);
+
+ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& b,
+                         sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s);
+
+ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
+                         sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
+                         sycl::buffer<std::complex<float>, 1>& s);
+
+ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
+                         sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
+                         sycl::buffer<std::complex<double>, 1>& s);
+
+ONEMATH_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
+                         sycl::buffer<float, 1>& param);
+
+ONEMATH_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
+                         sycl::buffer<double, 1>& param);
+
+ONEMATH_EXPORT void rotmg(sycl::queue& queue, sycl::buffer<float, 1>& d1,
+                          sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
+                          sycl::buffer<float, 1>& param);
+
+ONEMATH_EXPORT void rotmg(sycl::queue& queue, sycl::buffer<double, 1>& d1,
+                          sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
+                          sycl::buffer<double, 1>& param);
+
+ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
+                         std::int64_t incx);
+
+ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha,
+                         sycl::buffer<double, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
+
+ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+                         std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
+                         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n,
+                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
+                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, float beta,
+                               sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, double beta,
+                               sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+                               std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, std::complex<float> alpha,
+                               sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::complex<float> beta,
+                               sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, std::complex<double> alpha,
+                               sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::complex<double> beta,
+                               sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, sycl::half beta,
+                               sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
+                               std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
+                               std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
+                               std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                               oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                               std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, float beta, sycl::buffer<std::int32_t, 1>& c,
+                               std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                               oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                               oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                               float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, std::int64_t batch_size);
+
+ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                               oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                               oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                               double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, std::int64_t batch_size);
+
+ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                               oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                               oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                               std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, std::int64_t batch_size);
+
+ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                               oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                               oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                               std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                               std::int64_t stride_b, std::int64_t batch_size);
+
+ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose transa, oneapi::math::transpose transb,
+                          std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
+                          std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
+                          sycl::buffer<float, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose transa, oneapi::math::transpose transb,
+                          std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
+                          std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb,
+                          double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose transa, oneapi::math::transpose transb,
+                          std::int64_t n, std::int64_t k, std::complex<float> alpha,
+                          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                          std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
+                          std::int64_t ldc);
+
+ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                          oneapi::math::transpose transa, oneapi::math::transpose transb,
+                          std::int64_t n, std::int64_t k, std::complex<double> alpha,
+                          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                          std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
+                          std::int64_t ldc);
+
+ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                              oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                              std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                              sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao,
+                              sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
+                              sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
+                              sycl::buffer<int32_t, 1>& co);
+
+ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                              oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                              std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                              sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao,
+                              sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
+                              sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
+                              sycl::buffer<int32_t, 1>& co);
+
+ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                              oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                              std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                              sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
+                              sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
+                              sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
+                              sycl::buffer<int32_t, 1>& co);
+
+ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                              oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                              std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                              sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
+                              sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
+                              sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
+                              sycl::buffer<int32_t, 1>& co);
+
+ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, float alpha,
+                                   sycl::buffer<float, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<float, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, double alpha,
+                                   sycl::buffer<double, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<double, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                   sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                   sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                                   std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
+                                   std::int64_t ldb, std::int64_t stride_b,
+                                   std::int64_t batch_size);
+
+ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, float alpha,
+                                   sycl::buffer<float, 1>& ab, std::int64_t lda, std::int64_t ldb,
+                                   std::int64_t stride, std::int64_t batch_size);
+
+ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, double alpha,
+                                   sycl::buffer<double, 1>& ab, std::int64_t lda, std::int64_t ldb,
+                                   std::int64_t stride, std::int64_t batch_size);
+
+ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                   sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
+                                   std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
+
+ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                   sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
+                                   std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                  oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                  float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
+                                  std::int64_t ldb, std::int64_t stride_b,
+                                  sycl::buffer<float, 1>& c, std::int64_t ldc,
+                                  std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                  oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                  double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
+                                  std::int64_t ldb, std::int64_t stride_b,
+                                  sycl::buffer<double, 1>& c, std::int64_t ldc,
+                                  std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                  oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                  std::complex<float> alpha,
+                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, std::complex<float> beta,
+                                  sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                                  std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
+                                  std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                  oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                  std::complex<double> alpha,
+                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                                  std::int64_t stride_a, std::complex<double> beta,
+                                  sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                                  std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
+                                  std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
+
+ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                             std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb);
+
+ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                             std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb);
+
+ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, std::complex<float> alpha,
+                             sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                             sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
+
+ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, std::complex<double> alpha,
+                             sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                             sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
+
+ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                              std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                              std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1>& b,
+                              std::int64_t ldb, std::int64_t strideb);
+
+ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                              std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                              std::int64_t lda, std::int64_t stridea, sycl::buffer<double, 1>& b,
+                              std::int64_t ldb, std::int64_t strideb);
+
+ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                              std::int64_t n, std::complex<float> alpha,
+                              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
+                              std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
+                              std::int64_t ldb, std::int64_t strideb);
+
+ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                              std::int64_t n, std::complex<double> alpha,
+                              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
+                              std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
+                              std::int64_t ldb, std::int64_t strideb);
+
+ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, float alpha, sycl::buffer<float, 1>& ab,
+                             std::int64_t lda, std::int64_t ldb);
+
+ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, double alpha, sycl::buffer<double, 1>& ab,
+                             std::int64_t lda, std::int64_t ldb);
+
+ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, std::complex<float> alpha,
+                             sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
+                             std::int64_t ldb);
+
+ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                             std::int64_t n, std::complex<double> alpha,
+                             sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
+                             std::int64_t ldb);
+
+ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                            oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                            float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
+                            sycl::buffer<float, 1>& b, std::int64_t ldb, sycl::buffer<float, 1>& c,
+                            std::int64_t ldc);
+
+ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                            oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                            double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
+                            sycl::buffer<double, 1>& b, std::int64_t ldb,
+                            sycl::buffer<double, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                            oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                            std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                            std::int64_t lda, std::complex<float> beta,
+                            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
+                            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
+
+ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                            oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                            std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                            std::int64_t lda, std::complex<double> beta,
+                            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
+                            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
+
+// USM APIs
+
+ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                                oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                std::int64_t k, float alpha, const float* a, std::int64_t lda,
+                                const float* b, std::int64_t ldb, float beta, float* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                                oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                std::int64_t k, double alpha, const double* a, std::int64_t lda,
+                                const double* b, std::int64_t ldb, double beta, double* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                                oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                std::int64_t k, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                const std::complex<float>* b, std::int64_t ldb,
+                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                                oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                std::int64_t k, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                const std::complex<double>* b, std::int64_t ldb,
+                                std::complex<double> beta, std::complex<double>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                                oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                std::int64_t k, sycl::half alpha, const sycl::half* a,
+                                std::int64_t lda, const sycl::half* b, std::int64_t ldb,
+                                sycl::half beta, sycl::half* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                                oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda,
+                                const sycl::half* b, std::int64_t ldb, float beta, float* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa,
+                                oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda,
+                                const bfloat16* b, std::int64_t ldb, float beta, float* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                                     oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                                     std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                                     const std::int8_t* a, std::int64_t lda, std::int8_t ao,
+                                     const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
+                                     float beta, std::int32_t* c, std::int64_t ldc,
+                                     const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                                     oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                                     std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                                     const std::int8_t* a, std::int64_t lda, std::int8_t ao,
+                                     const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
+                                     float beta, std::int32_t* c, std::int64_t ldc,
+                                     const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                                     oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                                     std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                                     const std::uint8_t* a, std::int64_t lda, std::uint8_t ao,
+                                     const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
+                                     float beta, std::int32_t* c, std::int64_t ldc,
+                                     const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                                     oneapi::math::transpose transb, oneapi::math::offset offsetc,
+                                     std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                                     const std::uint8_t* a, std::int64_t lda, std::uint8_t ao,
+                                     const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
+                                     float beta, std::int32_t* c, std::int64_t ldc,
+                                     const std::int32_t* co,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                                float alpha, const float* a, std::int64_t lda, const float* b,
+                                std::int64_t ldb, float beta, float* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                                double alpha, const double* a, std::int64_t lda, const double* b,
+                                std::int64_t ldb, double beta, double* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                std::complex<double> beta, std::complex<double>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                std::complex<double> beta, std::complex<double>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                float alpha, const float* a, std::int64_t lda, float beta, float* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                double alpha, const double* a, std::int64_t lda, double beta,
+                                double* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, std::complex<float> beta, std::complex<float>* c,
+                                std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, std::complex<double> beta,
+                                std::complex<double>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, float* alpha, const float** a,
+                                      std::int64_t* lda, float* beta, float** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, double* alpha, const double** a,
+                                      std::int64_t* lda, double* beta, double** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, std::complex<float>* alpha,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      std::complex<float>* beta, std::complex<float>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
+                                      std::int64_t* k, std::complex<double>* alpha,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      std::complex<double>* beta, std::complex<double>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                      oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                      float alpha, const float* a, std::int64_t lda,
+                                      std::int64_t stride_a, float beta, float* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                      oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                      double alpha, const double* a, std::int64_t lda,
+                                      std::int64_t stride_a, double beta, double* c,
+                                      std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                      oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                      std::complex<float> alpha, const std::complex<float>* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::complex<float> beta, std::complex<float>* c,
+                                      std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                      oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                      std::complex<double> alpha, const std::complex<double>* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::complex<double> beta, std::complex<double>* c,
+                                      std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event herk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                float alpha, const std::complex<float>* a, std::int64_t lda,
+                                float beta, std::complex<float>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event herk(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                double alpha, const std::complex<double>* a, std::int64_t lda,
+                                double beta, std::complex<double>* c, std::int64_t ldc,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                 float alpha, const float* a, std::int64_t lda, const float* b,
+                                 std::int64_t ldb, float beta, float* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                 double alpha, const double* a, std::int64_t lda, const double* b,
+                                 std::int64_t ldb, double beta, double* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<float> alpha, const std::complex<float>* a,
+                                 std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<double> alpha, const std::complex<double>* a,
+                                 std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double> beta, std::complex<double>* c,
+                                 std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<float> alpha, const std::complex<float>* a,
+                                 std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
+                                 float beta, std::complex<float>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
+                                 std::complex<double> alpha, const std::complex<double>* a,
+                                 std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
+                                 double beta, std::complex<double>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                float alpha, const float* a, std::int64_t lda, float* b,
+                                std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                double alpha, const double* a, std::int64_t lda, double* b,
+                                std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                float alpha, const float* a, std::int64_t lda, float* b,
+                                std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                double alpha, const double* a, std::int64_t lda, double* b,
+                                std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right,
+                                oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                      oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                      float alpha, const float* a, std::int64_t lda,
+                                      std::int64_t stride_a, float* b, std::int64_t ldb,
+                                      std::int64_t stride_b, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                      oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                      double alpha, const double* a, std::int64_t lda,
+                                      std::int64_t stride_a, double* b, std::int64_t ldb,
+                                      std::int64_t stride_b, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                      oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                      std::complex<float> alpha, const std::complex<float>* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::complex<float>* b, std::int64_t ldb,
+                                      std::int64_t stride_b, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                      oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
+                                      std::complex<double> alpha, const std::complex<double>* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::complex<double>* b, std::int64_t ldb,
+                                      std::int64_t stride_b, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, oneapi::math::diag* unit_diag,
+                                      std::int64_t* m, std::int64_t* n, float* alpha,
+                                      const float** a, std::int64_t* lda, float** b,
+                                      std::int64_t* ldb, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, oneapi::math::diag* unit_diag,
+                                      std::int64_t* m, std::int64_t* n, double* alpha,
+                                      const double** a, std::int64_t* lda, double** b,
+                                      std::int64_t* ldb, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, oneapi::math::diag* unit_diag,
+                                      std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      std::complex<float>** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      oneapi::math::uplo* upper_lower,
+                                      oneapi::math::transpose* trans, oneapi::math::diag* unit_diag,
+                                      std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      std::complex<double>** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, float alpha, const float* a, std::int64_t lda,
+                                const float* x, std::int64_t incx, float beta, float* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, double alpha, const double* a, std::int64_t lda,
+                                const double* x, std::int64_t incx, double beta, double* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                      std::int64_t m, std::int64_t n, float alpha, const float* a,
+                                      std::int64_t lda, std::int64_t stridea, const float* x,
+                                      std::int64_t incx, std::int64_t stridex, float beta, float* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                      std::int64_t m, std::int64_t n, double alpha, const double* a,
+                                      std::int64_t lda, std::int64_t stridea, const double* x,
+                                      std::int64_t incx, std::int64_t stridex, double beta,
+                                      double* y, std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+    std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
+    const std::complex<float>* x, std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
+    std::complex<float>* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
+    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+    std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
+    std::int64_t stridea, const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
+    std::complex<double> beta, std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
+    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                      std::int64_t* m, std::int64_t* n, float* alpha,
+                                      const float** a, std::int64_t* lda, const float** x,
+                                      std::int64_t* incx, float* beta, float** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                      std::int64_t* m, std::int64_t* n, double* alpha,
+                                      const double** a, std::int64_t* lda, const double** x,
+                                      std::int64_t* incx, double* beta, double** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                      std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      const std::complex<float>** x, std::int64_t* incx,
+                                      std::complex<float>* beta, std::complex<float>** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                      std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      const std::complex<double>** x, std::int64_t* incx,
+                                      std::complex<double>* beta, std::complex<double>** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      std::int64_t m, std::int64_t n, const float* a,
+                                      std::int64_t lda, std::int64_t stridea, const float* x,
+                                      std::int64_t incx, std::int64_t stridex, float* c,
+                                      std::int64_t ldc, std::int64_t stridec,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      std::int64_t m, std::int64_t n, const double* a,
+                                      std::int64_t lda, std::int64_t stridea, const double* x,
+                                      std::int64_t incx, std::int64_t stridex, double* c,
+                                      std::int64_t ldc, std::int64_t stridec,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      std::int64_t m, std::int64_t n, const std::complex<float>* a,
+                                      std::int64_t lda, std::int64_t stridea,
+                                      const std::complex<float>* x, std::int64_t incx,
+                                      std::int64_t stridex, std::complex<float>* c,
+                                      std::int64_t ldc, std::int64_t stridec,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                                      std::int64_t m, std::int64_t n, const std::complex<double>* a,
+                                      std::int64_t lda, std::int64_t stridea,
+                                      const std::complex<double>* x, std::int64_t incx,
+                                      std::int64_t stridex, std::complex<double>* c,
+                                      std::int64_t ldc, std::int64_t stridec,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      std::int64_t* m, std::int64_t* n, const float** a,
+                                      std::int64_t* lda, const float** x, std::int64_t* incx,
+                                      float** c, std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      std::int64_t* m, std::int64_t* n, const double** a,
+                                      std::int64_t* lda, const double** x, std::int64_t* incx,
+                                      double** c, std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      std::int64_t* m, std::int64_t* n,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      const std::complex<float>** x, std::int64_t* incx,
+                                      std::complex<float>** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                                      std::int64_t* m, std::int64_t* n,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      const std::complex<double>** x, std::int64_t* incx,
+                                      std::complex<double>** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha,
+                                const float* a, std::int64_t lda, const float* x, std::int64_t incx,
+                                float beta, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
+                                const double* a, std::int64_t lda, const double* x,
+                                std::int64_t incx, double beta, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, std::int64_t kl, std::int64_t ku,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
+                                std::int64_t n, std::int64_t kl, std::int64_t ku,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha,
+                               const float* x, std::int64_t incx, const float* y, std::int64_t incy,
+                               float* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha,
+                               const double* x, std::int64_t incx, const double* y,
+                               std::int64_t incy, double* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* x,
+                                std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* x,
+                                std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* x,
+                                std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* x,
+                                std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::int64_t k, std::complex<float> alpha,
+                                const std::complex<float>* a, std::int64_t lda,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::int64_t k, std::complex<double> alpha,
+                                const std::complex<double>* a, std::int64_t lda,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               float alpha, const std::complex<float>* x, std::int64_t incx,
+                               std::complex<float>* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               double alpha, const std::complex<double>* x, std::int64_t incx,
+                               std::complex<double>* a, std::int64_t lda,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* x,
+                                std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* x,
+                                std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* a,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* a,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double> beta, std::complex<double>* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               float alpha, const std::complex<float>* x, std::int64_t incx,
+                               std::complex<float>* a,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               double alpha, const std::complex<double>* x, std::int64_t incx,
+                               std::complex<double>* a,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<float> alpha, const std::complex<float>* x,
+                                std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* a,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::complex<double> alpha, const std::complex<double>* x,
+                                std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* a,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::int64_t k, float alpha, const float* a, std::int64_t lda,
+                                const float* x, std::int64_t incx, float beta, float* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                std::int64_t k, double alpha, const double* a, std::int64_t lda,
+                                const double* x, std::int64_t incx, double beta, double* y,
+                                std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                float alpha, const float* a, std::int64_t lda, const float* x,
+                                std::int64_t incx, float beta, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                double alpha, const double* a, std::int64_t lda, const double* x,
+                                std::int64_t incx, double beta, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               float alpha, const float* x, std::int64_t incx, float* a,
+                               std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               double alpha, const double* x, std::int64_t incx, double* a,
+                               std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                float alpha, const float* x, std::int64_t incx, const float* y,
+                                std::int64_t incy, float* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                double alpha, const double* x, std::int64_t incx, const double* y,
+                                std::int64_t incy, double* a, std::int64_t lda,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                float alpha, const float* a, const float* x, std::int64_t incx,
+                                float beta, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                double alpha, const double* a, const double* x, std::int64_t incx,
+                                double beta, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               float alpha, const float* x, std::int64_t incx, float* a,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                               double alpha, const double* x, std::int64_t incx, double* a,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                float alpha, const float* x, std::int64_t incx, const float* y,
+                                std::int64_t incy, float* a,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
+                                double alpha, const double* x, std::int64_t incx, const double* y,
+                                std::int64_t incy, double* a,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
+                                float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
+                                double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
+                                float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
+                                double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, std::int64_t k, const std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const float* a, float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const double* a, double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<float>* a,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<double>* a,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const float* a, float* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const double* a, double* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<float>* a,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<double>* a,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const float* a, std::int64_t lda, float* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const double* a, std::int64_t lda, double* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const float* a, std::int64_t lda, float* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const double* a, std::int64_t lda, double* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                oneapi::math::transpose trans, oneapi::math::diag unit_diag,
+                                std::int64_t n, const std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
+                                std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
+                                std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
+                                std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
+                                std::complex<float>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
+                                std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
+                                std::complex<double>* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
+                                 std::int64_t incx, std::int64_t* result,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
+                                std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
+                                std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x,
+                                std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x,
+                                std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x,
+                                std::int64_t incx, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x,
+                                std::int64_t incx, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                                const std::complex<float>* x, std::int64_t incx,
+                                std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
+                                const std::complex<double>* x, std::int64_t incx,
+                                std::complex<double>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha,
+                                      const float** x, std::int64_t* incx, float** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha,
+                                      const double** x, std::int64_t* incx, double** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n,
+                                      std::complex<float>* alpha, const std::complex<float>** x,
+                                      std::int64_t* incx, std::complex<float>** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n,
+                                      std::complex<double>* alpha, const std::complex<double>** x,
+                                      std::int64_t* incx, std::complex<double>** y,
+                                      std::int64_t* incy, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha,
+                                      const float* x, std::int64_t incx, std::int64_t stridex,
+                                      float* y, std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha,
+                                      const double* x, std::int64_t incx, std::int64_t stridex,
+                                      double* y, std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                                      const std::complex<float>* x, std::int64_t incx,
+                                      std::int64_t stridex, std::complex<float>* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n,
+                                      std::complex<double> alpha, const std::complex<double>* x,
+                                      std::int64_t incx, std::int64_t stridex,
+                                      std::complex<double>* y, std::int64_t incy,
+                                      std::int64_t stridey, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x,
+                                 std::int64_t incx, const float beta, float* y, std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x,
+                                 std::int64_t incx, const double beta, double* y, std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                                 const std::complex<float>* x, std::int64_t incx,
+                                 const std::complex<float> beta, std::complex<float>* y,
+                                 std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
+                                 const std::complex<double>* x, std::int64_t incx,
+                                 const std::complex<double> beta, std::complex<double>* y,
+                                 std::int64_t incy,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x,
+                                std::int64_t incx, float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x,
+                                std::int64_t incx, double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
+                                std::int64_t incx, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
+                                std::int64_t incx, std::complex<double>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x,
+                                      std::int64_t* incx, float** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x,
+                                      std::int64_t* incx, double** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n,
+                                      const std::complex<float>** x, std::int64_t* incx,
+                                      std::complex<float>** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n,
+                                      const std::complex<double>** x, std::int64_t* incx,
+                                      std::complex<double>** y, std::int64_t* incy,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x,
+                                      std::int64_t incx, std::int64_t stridex, float* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x,
+                                      std::int64_t incx, std::int64_t stridex, double* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n,
+                                      const std::complex<float>* x, std::int64_t incx,
+                                      std::int64_t stridex, std::complex<float>* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n,
+                                      const std::complex<double>* x, std::int64_t incx,
+                                      std::int64_t stridex, std::complex<double>* y,
+                                      std::int64_t incy, std::int64_t stridey,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x,
+                               std::int64_t incx, const float* y, std::int64_t incy, float* result,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x,
+                               std::int64_t incx, const double* y, std::int64_t incy,
+                               double* result, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x,
+                                  std::int64_t incx, const float* y, std::int64_t incy,
+                                  float* result, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x,
+                               std::int64_t incx, const float* y, std::int64_t incy, double* result,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
+                                std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
+                                std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x,
+                                std::int64_t incx, float* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x,
+                                std::int64_t incx, double* result,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<float>* x,
+                               std::int64_t incx, std::complex<float>* y, std::int64_t incy,
+                               float c, float s, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<double>* x,
+                               std::int64_t incx, std::complex<double>* y, std::int64_t incy,
+                               double c, double s,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx,
+                               float* y, std::int64_t incy, float c, float s,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx,
+                               double* y, std::int64_t incy, double c, double s,
+                               const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, std::complex<float>* a, std::complex<float>* b,
+                                float* c, std::complex<float>* s,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, std::complex<double>* a,
+                                std::complex<double>* b, double* c, std::complex<double>* s,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx,
+                                float* y, std::int64_t incy, float* param,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx,
+                                double* y, std::int64_t incy, double* param,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1,
+                                 float* param, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1,
+                                 double* param, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x,
+                                std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha,
+                                std::complex<float>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha,
+                                std::complex<double>* x, std::int64_t incx,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx,
+                                float* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx,
+                                double* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<float>* x,
+                                std::int64_t incx, std::complex<float>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<double>* x,
+                                std::int64_t incx, std::complex<double>* y, std::int64_t incy,
+                                const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const float** a, std::int64_t* lda, const float** b,
+                                      std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, double* alpha,
+                                      const double** a, std::int64_t* lda, const double** b,
+                                      std::int64_t* ldb, double* beta, double** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, std::complex<float>* alpha,
+                                      const std::complex<float>** a, std::int64_t* lda,
+                                      const std::complex<float>** b, std::int64_t* ldb,
+                                      std::complex<float>* beta, std::complex<float>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
+                                      const std::complex<double>** a, std::int64_t* lda,
+                                      const std::complex<double>** b, std::int64_t* ldb,
+                                      std::complex<double>* beta, std::complex<double>** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, sycl::half* alpha,
+                                      const sycl::half** a, std::int64_t* lda, const sycl::half** b,
+                                      std::int64_t* ldb, sycl::half* beta, sycl::half** c,
+                                      std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const sycl::half** a, std::int64_t* lda, const sycl::half** b,
+                                      std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
+                                      std::int64_t group_count, std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const std::int8_t** a, std::int64_t* lda,
+                                      const std::int8_t** b, std::int64_t* ldb, float* beta,
+                                      float** c, std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                                      oneapi::math::transpose* transb, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float* alpha,
+                                      const std::int8_t** a, std::int64_t* lda,
+                                      const std::int8_t** b, std::int64_t* ldb, float* beta,
+                                      std::int32_t** c, std::int64_t* ldc, std::int64_t group_count,
+                                      std::int64_t* group_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                      oneapi::math::transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha, const float* a,
+                                      std::int64_t lda, std::int64_t stride_a, const float* b,
+                                      std::int64_t ldb, std::int64_t stride_b, float beta, float* c,
+                                      std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                      oneapi::math::transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, double alpha, const double* a,
+                                      std::int64_t lda, std::int64_t stride_a, const double* b,
+                                      std::int64_t ldb, std::int64_t stride_b, double beta,
+                                      double* c, std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(
+    sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
+    const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
+    const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<float> beta,
+    std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
+    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(
+    sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
+    const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
+    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
+    std::complex<double> beta, std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
+    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                      oneapi::math::transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, sycl::half alpha,
+                                      const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
+                                      const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
+                                      sycl::half beta, sycl::half* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                      oneapi::math::transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha,
+                                      const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
+                                      const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
+                                      float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                      oneapi::math::transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha,
+                                      const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
+                                      const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
+                                      float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
+                                      std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                      oneapi::math::transpose transb, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float alpha,
+                                      const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
+                                      const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
+                                      float beta, std::int32_t* c, std::int64_t ldc,
+                                      std::int64_t stride_c, std::int64_t batch_size,
+                                      const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose transa, oneapi::math::transpose transb,
+                                 std::int64_t n, std::int64_t k, float alpha, const float* a,
+                                 std::int64_t lda, const float* b, std::int64_t ldb, float beta,
+                                 float* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose transa, oneapi::math::transpose transb,
+                                 std::int64_t n, std::int64_t k, double alpha, const double* a,
+                                 std::int64_t lda, const double* b, std::int64_t ldb, double beta,
+                                 double* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose transa, oneapi::math::transpose transb,
+                                 std::int64_t n, std::int64_t k, std::complex<float> alpha,
+                                 const std::complex<float>* a, std::int64_t lda,
+                                 const std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose transa, oneapi::math::transpose transb,
+                                 std::int64_t n, std::int64_t k, std::complex<double> alpha,
+                                 const std::complex<double>* a, std::int64_t lda,
+                                 const std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double> beta, std::complex<double>* c,
+                                 std::int64_t ldc,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n, float alpha,
+                                          const float* a, std::int64_t lda, std::int64_t stride_a,
+                                          float* b, std::int64_t ldb, std::int64_t stride_b,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n, double alpha,
+                                          const double* a, std::int64_t lda, std::int64_t stride_a,
+                                          double* b, std::int64_t ldb, std::int64_t stride_b,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                          const std::complex<float>* a, std::int64_t lda,
+                                          std::int64_t stride_a, std::complex<float>* b,
+                                          std::int64_t ldb, std::int64_t stride_b,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n,
+                                          std::complex<double> alpha, const std::complex<double>* a,
+                                          std::int64_t lda, std::int64_t stride_a,
+                                          std::complex<double>* b, std::int64_t ldb,
+                                          std::int64_t stride_b, std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n, float alpha, float* ab,
+                                          std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n, double alpha, double* ab,
+                                          std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                          std::complex<float>* ab, std::int64_t lda,
+                                          std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                          std::int64_t m, std::int64_t n,
+                                          std::complex<double> alpha, std::complex<double>* ab,
+                                          std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                                          std::int64_t batch_size,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                         oneapi::math::transpose transb, std::int64_t m,
+                                         std::int64_t n, float alpha, const float* a,
+                                         std::int64_t lda, std::int64_t stride_a, float beta,
+                                         const float* b, std::int64_t ldb, std::int64_t stride_b,
+                                         float* c, std::int64_t ldc, std::int64_t stride_c,
+                                         std::int64_t batch_size,
+                                         const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                                         oneapi::math::transpose transb, std::int64_t m,
+                                         std::int64_t n, double alpha, const double* a,
+                                         std::int64_t lda, std::int64_t stride_a, double beta,
+                                         const double* b, std::int64_t ldb, std::int64_t stride_b,
+                                         double* c, std::int64_t ldc, std::int64_t stride_c,
+                                         std::int64_t batch_size,
+                                         const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd_batch(
+    sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+    std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
+    std::int64_t lda, std::int64_t stride_a, std::complex<float> beta, const std::complex<float>* b,
+    std::int64_t ldb, std::int64_t stride_b, std::complex<float>* c, std::int64_t ldc,
+    std::int64_t stride_c, std::int64_t batch_size,
+    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd_batch(
+    sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+    std::int64_t m, std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
+    std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
+    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<double>* c,
+    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
+    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, float alpha, const float* a,
+                                    std::int64_t lda, float* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, double alpha, const double* a,
+                                    std::int64_t lda, double* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                    const std::complex<float>* a, std::int64_t lda,
+                                    std::complex<float>* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                    const std::complex<double>* a, std::int64_t lda,
+                                    std::complex<double>* b, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans,
+                                     std::int64_t m, std::int64_t n, float alpha, const float* a,
+                                     std::int64_t lda, std::int64_t stridea, float* b,
+                                     std::int64_t ldb, std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans,
+                                     std::int64_t m, std::int64_t n, double alpha, const double* a,
+                                     std::int64_t lda, std::int64_t stridea, double* b,
+                                     std::int64_t ldb, std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans,
+                                     std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                     const std::complex<float>* a, std::int64_t lda,
+                                     std::int64_t stridea, std::complex<float>* b, std::int64_t ldb,
+                                     std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans,
+                                     std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                     const std::complex<double>* a, std::int64_t lda,
+                                     std::int64_t stridea, std::complex<double>* b,
+                                     std::int64_t ldb, std::int64_t strideb,
+                                     const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, float alpha, float* ab,
+                                    std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, double alpha, double* ab,
+                                    std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
+                                    std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans,
+                                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
+                                    std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
+                                    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                   float alpha, const float* a, std::int64_t lda, float beta,
+                                   const float* b, std::int64_t ldb, float* c, std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                   double alpha, const double* a, std::int64_t lda, double beta,
+                                   const double* b, std::int64_t ldb, double* c, std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                   std::complex<float> alpha, const std::complex<float>* a,
+                                   std::int64_t lda, std::complex<float> beta,
+                                   const std::complex<float>* b, std::int64_t ldb,
+                                   std::complex<float>* c, std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa,
+                                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                                   std::complex<double> alpha, const std::complex<double>* a,
+                                   std::int64_t lda, std::complex<double> beta,
+                                   const std::complex<double>* b, std::int64_t ldb,
+                                   std::complex<double>* c, std::int64_t ldc,
+                                   const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, float* alpha, const float** a,
+                                          std::int64_t* lda, float** b, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, double* alpha, const double** a,
+                                          std::int64_t* lda, double** b, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, std::complex<float>* alpha,
+                                          const std::complex<float>** a, std::int64_t* lda,
+                                          std::complex<float>** b, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, std::complex<double>* alpha,
+                                          const std::complex<double>** a, std::int64_t* lda,
+                                          std::complex<double>** b, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, float* alpha, float** ab,
+                                          std::int64_t* lda, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, double* alpha, double** ab,
+                                          std::int64_t* lda, std::int64_t* ldb,
+                                          std::int64_t group_count, std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, std::complex<float>* alpha,
+                                          std::complex<float>** ab, std::int64_t* lda,
+                                          std::int64_t* ldb, std::int64_t group_count,
+                                          std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
+                                          std::int64_t* n, std::complex<double>* alpha,
+                                          std::complex<double>** ab, std::int64_t* lda,
+                                          std::int64_t* ldb, std::int64_t group_count,
+                                          std::int64_t* groupsize,
+                                          const std::vector<sycl::event>& dependencies = {});
diff --git a/include/oneapi/mkl/blas/detail/portblas/blas_ct.hpp b/include/oneapi/math/blas/detail/portblas/blas_ct.hpp
similarity index 85%
rename from include/oneapi/mkl/blas/detail/portblas/blas_ct.hpp
rename to include/oneapi/math/blas/detail/portblas/blas_ct.hpp
index 6d3b0b2c2..c79dc2dbd 100644
--- a/include/oneapi/mkl/blas/detail/portblas/blas_ct.hpp
+++ b/include/oneapi/math/blas/detail/portblas/blas_ct.hpp
@@ -28,13 +28,13 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
-#include "oneapi/mkl/blas/detail/blas_ct_backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
+#include "oneapi/math/blas/detail/blas_ct_backends.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 
@@ -51,7 +51,7 @@ namespace row_major {
 
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
 #endif //_DETAIL_PORTBLAS_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/blas/detail/portblas/blas_ct.hxx b/include/oneapi/math/blas/detail/portblas/blas_ct.hxx
similarity index 73%
rename from include/oneapi/mkl/blas/detail/portblas/blas_ct.hxx
rename to include/oneapi/math/blas/detail/portblas/blas_ct.hxx
index 6f56157ba..2f3694c6e 100644
--- a/include/oneapi/mkl/blas/detail/portblas/blas_ct.hxx
+++ b/include/oneapi/math/blas/detail/portblas/blas_ct.hxx
@@ -22,114 +22,114 @@
 void herk(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, float beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, beta, c, ldc);
 }
 
 void herk(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<std::complex<double>, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, beta, c, ldc);
 }
 
 void scal(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::portblas> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::portblas> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void trmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void tpmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void tpmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void tpmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void tpmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void spr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                            a);
+    oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a);
 }
 
 void spr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                            a);
+    oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -138,9 +138,9 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -149,9 +149,9 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -160,9 +160,9 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -171,9 +171,9 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -182,9 +182,9 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -193,9 +193,9 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -204,9 +204,9 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -215,57 +215,57 @@ void gemm_batch(backend_selector<backend::portblas> selector, transpose transa,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 float beta, sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                   c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                    alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                    c, ldc, stride_c, batch_size);
 }
 
 void syrk(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, beta, c, ldc);
 }
 
 void syrk_batch(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                    alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                    batch_size);
 }
 
 void syrk_batch(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                    alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                    batch_size);
 }
 
 void syrk_batch(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
@@ -273,9 +273,9 @@ void syrk_batch(backend_selector<backend::portblas> selector, uplo upper_lower,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                    alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                    batch_size);
 }
 
 void syrk_batch(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
@@ -283,186 +283,186 @@ void syrk_batch(backend_selector<backend::portblas> selector, uplo upper_lower,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                    alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                    batch_size);
 }
 
 void her2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a, lda);
+    oneapi::math::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a, lda);
 }
 
 void her2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a, lda);
+    oneapi::math::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a, lda);
 }
 
 void hbmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                             x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void hbmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                             x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void rot(backend_selector<backend::portblas> selector, std::int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::portblas> selector, std::int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
-    oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
-    oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void axpy(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::portblas> selector, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::portblas> selector, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy_batch(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                   y, incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx,
+                                                    stridex, y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                   y, incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx,
+                                                    stridex, y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::portblas> selector, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                   y, incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx,
+                                                    stridex, y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::portblas> selector, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                   y, incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx,
+                                                    stridex, y, incy, stridey, batch_size);
 }
 
 void axpby(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
            sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                              incy);
+    oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                               incy);
 }
 
 void axpby(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
            sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
            std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                              incy);
+    oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                               incy);
 }
 
 void axpby(backend_selector<backend::portblas> selector, std::int64_t n, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                              incy);
+    oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                               incy);
 }
 
 void axpby(backend_selector<backend::portblas> selector, std::int64_t n, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                              incy);
+    oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                               incy);
 }
 
 void sdsdot(backend_selector<backend::portblas> selector, std::int64_t n, float sb,
             sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
             std::int64_t incy, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
-                                               result);
+    oneapi::math::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
+                                                result);
 }
 
 void gerc(backend_selector<backend::portblas> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                             lda);
+    oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy,
+                                              a, lda);
 }
 
 void gerc(backend_selector<backend::portblas> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                             lda);
+    oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy,
+                                              a, lda);
 }
 
 void syr2k(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
            sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                              a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                               alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                              a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                               alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
@@ -470,8 +470,8 @@ void syr2k(backend_selector<backend::portblas> selector, uplo upper_lower, trans
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                              a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                               alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
@@ -479,40 +479,40 @@ void syr2k(backend_selector<backend::portblas> selector, uplo upper_lower, trans
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                              a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
+                                               alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
           std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                              incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
           std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                              incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                              incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                              incx, beta, y, incy);
 }
 
 void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -520,9 +520,9 @@ void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, s
                 std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
                 std::int64_t stridex, float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                   stridea, x, incx, stridex, beta, y, incy,
-                                                   stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                    lda, stridea, x, incx, stridex, beta, y, incy,
+                                                    stridey, batch_size);
 }
 
 void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -530,9 +530,9 @@ void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, s
                 std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
                 std::int64_t stridex, double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                   stridea, x, incx, stridex, beta, y, incy,
-                                                   stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                    lda, stridea, x, incx, stridex, beta, y, incy,
+                                                    stridey, batch_size);
 }
 
 void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -541,9 +541,9 @@ void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, s
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                   stridea, x, incx, stridex, beta, y, incy,
-                                                   stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                    lda, stridea, x, incx, stridex, beta, y, incy,
+                                                    stridey, batch_size);
 }
 
 void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -552,9 +552,9 @@ void gemv_batch(backend_selector<backend::portblas> selector, transpose trans, s
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                   stridea, x, incx, stridex, beta, y, incy,
-                                                   stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                    lda, stridea, x, incx, stridex, beta, y, incy,
+                                                    stridey, batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, std::int64_t m,
@@ -562,9 +562,9 @@ void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, s
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                   stridea, x, incx, stridex, c, ldc, stridec,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                    stridea, x, incx, stridex, c, ldc, stridec,
+                                                    batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, std::int64_t m,
@@ -572,9 +572,9 @@ void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, s
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                   stridea, x, incx, stridex, c, ldc, stridec,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                    stridea, x, incx, stridex, c, ldc, stridec,
+                                                    batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, std::int64_t m,
@@ -582,9 +582,9 @@ void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, s
                 std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                   stridea, x, incx, stridex, c, ldc, stridec,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                    stridea, x, incx, stridex, c, ldc, stridec,
+                                                    batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, std::int64_t m,
@@ -592,89 +592,89 @@ void dgmm_batch(backend_selector<backend::portblas> selector, side left_right, s
                 std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
                 std::int64_t stridec, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                   stridea, x, incx, stridex, c, ldc, stridec,
-                                                   batch_size);
+    oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                    stridea, x, incx, stridex, c, ldc, stridec,
+                                                    batch_size);
 }
 
 void her(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                            lda);
+    oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a, lda);
 }
 
 void her(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                            lda);
+    oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a, lda);
 }
 
 void hpr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                            a);
+    oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a);
 }
 
 void hpr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                            a);
+    oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a);
 }
 
 void iamin(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::portblas> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::portblas> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void hpmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                              incx, beta, y, incy);
 }
 
 void hpmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                              incx, beta, y, incy);
 }
 
 void spmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                              incx, beta, y, incy);
 }
 
 void spmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx,
           double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                              incx, beta, y, incy);
 }
 
 void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -682,9 +682,9 @@ void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, t
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                  co);
+    oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                   n, k, alpha, a, lda, ao, b, ldb, bo, beta, c,
+                                                   ldc, co);
 }
 
 void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -692,9 +692,9 @@ void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, t
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
                std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                  co);
+    oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                   n, k, alpha, a, lda, ao, b, ldb, bo, beta, c,
+                                                   ldc, co);
 }
 
 void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -702,9 +702,9 @@ void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, t
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                  co);
+    oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                   n, k, alpha, a, lda, ao, b, ldb, bo, beta, c,
+                                                   ldc, co);
 }
 
 void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -712,85 +712,85 @@ void gemm_bias(backend_selector<backend::portblas> selector, transpose transa, t
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                  co);
+    oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                   n, k, alpha, a, lda, ao, b, ldb, bo, beta, c,
+                                                   ldc, co);
 }
 
 void swap(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void geru(backend_selector<backend::portblas> selector, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                             lda);
+    oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy,
+                                              a, lda);
 }
 
 void geru(backend_selector<backend::portblas> selector, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                             lda);
+    oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy,
+                                              a, lda);
 }
 
 void nrm2(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void gemm(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -798,8 +798,8 @@ void gemm(backend_selector<backend::portblas> selector, transpose transa, transp
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -807,106 +807,106 @@ void gemm(backend_selector<backend::portblas> selector, transpose transa, transp
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
           std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a, lda);
+    oneapi::math::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a, lda);
 }
 
 void syr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a, lda);
+    oneapi::math::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a, lda);
 }
 
 void ger(backend_selector<backend::portblas> selector, std::int64_t m, std::int64_t n, float alpha,
          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                            lda);
+    oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                             lda);
 }
 
 void ger(backend_selector<backend::portblas> selector, std::int64_t m, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                            lda);
+    oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                             lda);
 }
 
 void trsm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void dotu(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotu(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void hemm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -914,8 +914,8 @@ void hemm(backend_selector<backend::portblas> selector, side left_right, uplo up
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                             alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                              alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hemm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -923,40 +923,40 @@ void hemm(backend_selector<backend::portblas> selector, side left_right, uplo up
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                             alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                              alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hpr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a);
+    oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a);
 }
 
 void hpr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a);
+    oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a);
 }
 
 void gbmv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                             lda, x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                             lda, x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -964,8 +964,8 @@ void gbmv(backend_selector<backend::portblas> selector, transpose trans, std::in
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                             lda, x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -973,52 +973,52 @@ void gbmv(backend_selector<backend::portblas> selector, transpose trans, std::in
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                             lda, x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void tbmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void symm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                             alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                              alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                             alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                              alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -1026,8 +1026,8 @@ void symm(backend_selector<backend::portblas> selector, side left_right, uplo up
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                             alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                              alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -1035,222 +1035,222 @@ void symm(backend_selector<backend::portblas> selector, side left_right, uplo up
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                             alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                              alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void dotc(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotc(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void syr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                            lda);
+    oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a, lda);
 }
 
 void syr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
          std::int64_t lda) {
-    oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                            lda);
+    oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             a, lda);
 }
 
 void trmm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                             unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                              unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void rotmg(backend_selector<backend::portblas> selector, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void rotmg(backend_selector<backend::portblas> selector, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void tpsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void tpsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void tpsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void tpsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, x, incx);
 }
 
 void trsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, a, lda, x, incx);
 }
 
 void copy(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy_batch(backend_selector<backend::portblas> selector, std::int64_t n,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                   incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                    incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::portblas> selector, std::int64_t n,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                   incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                    incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::portblas> selector, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                   incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                    incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::portblas> selector, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                   incy, stridey, batch_size);
+    oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                    incy, stridey, batch_size);
 }
 
 void hemv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda,
+                                              x, incx, beta, y, incy);
 }
 
 void hemv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda,
+                                              x, incx, beta, y, incy);
 }
 
 void gemmt(backend_selector<backend::portblas> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                               k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::portblas> selector, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, double alpha,
            sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                               k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::portblas> selector, uplo upper_lower, transpose transa,
@@ -1258,8 +1258,8 @@ void gemmt(backend_selector<backend::portblas> selector, uplo upper_lower, trans
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                               k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::portblas> selector, uplo upper_lower, transpose transa,
@@ -1267,140 +1267,140 @@ void gemmt(backend_selector<backend::portblas> selector, uplo upper_lower, trans
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                               k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void asum(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::portblas> selector, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void sbmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                             x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void sbmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                             x, incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                              lda, x, incx, beta, y, incy);
 }
 
 void tbsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                             k, a, lda, x, incx);
+    oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag,
+                                              n, k, a, lda, x, incx);
 }
 
 void spr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a);
+    oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a);
 }
 
 void spr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
-                                             y, incy, a);
+    oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                              y, incy, a);
 }
 
 void iamax(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::portblas> selector, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::portblas> selector, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void rotm(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
           sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void rotm(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
           sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void dot(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::portblas> selector, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void trsm_batch(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -1408,9 +1408,9 @@ void trsm_batch(backend_selector<backend::portblas> selector, side left_right, u
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                   b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                    trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                    b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -1418,9 +1418,9 @@ void trsm_batch(backend_selector<backend::portblas> selector, side left_right, u
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
                 std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                   b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                    trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                    b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -1428,9 +1428,9 @@ void trsm_batch(backend_selector<backend::portblas> selector, side left_right, u
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                   b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                    trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                    b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::portblas> selector, side left_right, uplo upper_lower,
@@ -1438,9 +1438,9 @@ void trsm_batch(backend_selector<backend::portblas> selector, side left_right, u
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                   b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                    trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                    b, ldb, stride_b, batch_size);
 }
 
 void her2k(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
@@ -1448,8 +1448,8 @@ void her2k(backend_selector<backend::portblas> selector, uplo upper_lower, trans
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                              a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                               alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void her2k(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
@@ -1457,60 +1457,60 @@ void her2k(backend_selector<backend::portblas> selector, uplo upper_lower, trans
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                              a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
+                                               alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void rotg(backend_selector<backend::portblas> selector, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
-    oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::portblas> selector, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
-    oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::portblas> selector, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
-    oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::portblas> selector, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
-    oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void symv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
           std::int64_t incx, float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda,
+                                              x, incx, beta, y, incy);
 }
 
 void symv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    oneapi::mkl::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                             incx, beta, y, incy);
+    oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda,
+                                              x, incx, beta, y, incy);
 }
 
 void omatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -1518,8 +1518,8 @@ void omatcopy_batch(backend_selector<backend::portblas> selector, transpose tran
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
@@ -1527,38 +1527,38 @@ void omatcopy_batch(backend_selector<backend::portblas> selector, transpose tran
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                       lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                       lda, ldb, stride, batch_size);
+    oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha,
+                                                        ab, lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                       lda, ldb, stride, batch_size);
+    oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha,
+                                                        ab, lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                       lda, ldb, stride, batch_size);
+    oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha,
+                                                        ab, lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                       lda, ldb, stride, batch_size);
+    oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha,
+                                                        ab, lda, ldb, stride, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -1566,9 +1566,9 @@ void omatadd_batch(backend_selector<backend::portblas> selector, transpose trans
                    std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                      alpha, a, lda, stride_a, beta, b, ldb,
-                                                      stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                       alpha, a, lda, stride_a, beta, b, ldb,
+                                                       stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -1576,9 +1576,9 @@ void omatadd_batch(backend_selector<backend::portblas> selector, transpose trans
                    std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<double, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                      alpha, a, lda, stride_a, beta, b, ldb,
-                                                      stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                       alpha, a, lda, stride_a, beta, b, ldb,
+                                                       stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -1587,9 +1587,9 @@ void omatadd_batch(backend_selector<backend::portblas> selector, transpose trans
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                      alpha, a, lda, stride_a, beta, b, ldb,
-                                                      stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                       alpha, a, lda, stride_a, beta, b, ldb,
+                                                       stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -1599,113 +1599,113 @@ void omatadd_batch(backend_selector<backend::portblas> selector, transpose trans
                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                    std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                      alpha, a, lda, stride_a, beta, b, ldb,
-                                                      stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                       alpha, a, lda, stride_a, beta, b, ldb,
+                                                       stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 b, ldb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  b, ldb);
 }
 
 void omatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 b, ldb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  b, ldb);
 }
 
 void omatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 b, ldb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  b, ldb);
 }
 
 void omatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 b, ldb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  b, ldb);
 }
 
 void omatcopy2(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, b, ldb, strideb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, b, ldb, strideb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, b, ldb, strideb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, b, ldb, strideb);
+    oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, b, ldb, strideb);
 }
 
 void imatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                 ldb);
+    oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                  ldb);
 }
 
 void imatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                 ldb);
+    oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                  ldb);
 }
 
 void imatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                 ldb);
+    oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                  ldb);
 }
 
 void imatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                 ldb);
+    oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                  ldb);
 }
 
 void omatadd(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                a, lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                 a, lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                a, lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                 a, lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -1713,8 +1713,8 @@ void omatadd(backend_selector<backend::portblas> selector, transpose transa, tra
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                a, lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                 a, lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::portblas> selector, transpose transa, transpose transb,
@@ -1722,8 +1722,8 @@ void omatadd(backend_selector<backend::portblas> selector, transpose transa, tra
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
-                                                a, lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                 a, lda, beta, b, ldb, c, ldc);
 }
 
 // USM APIs
@@ -1731,7 +1731,7 @@ void omatadd(backend_selector<backend::portblas> selector, transpose transa, tra
 sycl::event syr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr2(
+    auto done = oneapi::math::blas::portblas::MAJOR::syr2(
         selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
@@ -1740,61 +1740,61 @@ sycl::event syr2(backend_selector<backend::portblas> selector, uplo upper_lower,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr2(
+    auto done = oneapi::math::blas::portblas::MAJOR::syr2(
         selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                          dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                          dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::portblas> selector, std::int64_t n,
                  std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                          dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::portblas> selector, std::int64_t n,
                  std::complex<double> alpha, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                          dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                          dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                          dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1802,7 +1802,7 @@ sycl::event trmv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event trmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1811,7 +1811,7 @@ sycl::event trmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1820,7 +1820,7 @@ sycl::event trmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1828,16 +1828,16 @@ sycl::event trmv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event tpmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1845,8 +1845,8 @@ sycl::event tpmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -1854,24 +1854,24 @@ sycl::event tpmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, dependencies);
     return done;
 }
 
@@ -1880,7 +1880,7 @@ sycl::event hpmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hpmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::hpmv(
         selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -1890,7 +1890,7 @@ sycl::event hpmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hpmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::hpmv(
         selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -1899,7 +1899,7 @@ sycl::event syrk(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1908,7 +1908,7 @@ sycl::event syrk(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1918,7 +1918,7 @@ sycl::event syrk(backend_selector<backend::portblas> selector, uplo upper_lower,
                  const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                  std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1928,7 +1928,7 @@ sycl::event syrk(backend_selector<backend::portblas> selector, uplo upper_lower,
                  const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                  std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1938,7 +1938,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo* upper
                        const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1949,7 +1949,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo* upper
                        const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1961,7 +1961,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo* upper
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1973,7 +1973,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo* upper
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1984,7 +1984,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo upper_
                        std::int64_t lda, std::int64_t stride_a, float beta, float* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1995,7 +1995,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo upper_
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2007,7 +2007,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo upper_
                        std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2019,7 +2019,7 @@ sycl::event syrk_batch(backend_selector<backend::portblas> selector, uplo upper_
                        std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -2029,7 +2029,7 @@ sycl::event her2(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::her2(
+    auto done = oneapi::math::blas::portblas::MAJOR::her2(
         selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
@@ -2038,7 +2038,7 @@ sycl::event her2(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::her2(
+    auto done = oneapi::math::blas::portblas::MAJOR::her2(
         selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
@@ -2049,8 +2049,8 @@ sycl::event hbmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                 lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                  lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2060,16 +2060,16 @@ sycl::event hbmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                 lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                  lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::portblas> selector, std::int64_t n,
                 std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                 std::int64_t incy, float c, float s, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
-                                                        c, s, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                         c, s, dependencies);
     return done;
 }
 
@@ -2077,40 +2077,40 @@ sycl::event rot(backend_selector<backend::portblas> selector, std::int64_t n,
                 std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                 std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
-                                                        c, s, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                         c, s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::portblas> selector, std::int64_t n, float* x,
                 std::int64_t incx, float* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
-                                                        c, s, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                         c, s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::portblas> selector, std::int64_t n, double* x,
                 std::int64_t incx, double* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
-                                                        c, s, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                         c, s, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
                  const float* x, std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                         incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx,
+                                                          y, incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
                  const double* x, std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                         incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx,
+                                                          y, incy, dependencies);
     return done;
 }
 
@@ -2118,8 +2118,8 @@ sycl::event axpy(backend_selector<backend::portblas> selector, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                         incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx,
+                                                          y, incy, dependencies);
     return done;
 }
 
@@ -2127,8 +2127,8 @@ sycl::event axpy(backend_selector<backend::portblas> selector, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                         incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx,
+                                                          y, incy, dependencies);
     return done;
 }
 
@@ -2136,7 +2136,7 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2145,7 +2145,7 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2155,7 +2155,7 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        std::int64_t* incx, std::complex<float>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2165,7 +2165,7 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        std::int64_t* incx, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2174,9 +2174,9 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                               incx, stridex, y, incy, stridey,
-                                                               batch_size, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                                incx, stridex, y, incy, stridey,
+                                                                batch_size, dependencies);
     return done;
 }
 
@@ -2184,9 +2184,9 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                               incx, stridex, y, incy, stridey,
-                                                               batch_size, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                                incx, stridex, y, incy, stridey,
+                                                                batch_size, dependencies);
     return done;
 }
 
@@ -2195,9 +2195,9 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        std::int64_t stridex, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                               incx, stridex, y, incy, stridey,
-                                                               batch_size, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                                incx, stridex, y, incy, stridey,
+                                                                batch_size, dependencies);
     return done;
 }
 
@@ -2206,25 +2206,25 @@ sycl::event axpy_batch(backend_selector<backend::portblas> selector, std::int64_
                        std::int64_t stridex, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                               incx, stridex, y, incy, stridey,
-                                                               batch_size, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                                incx, stridex, y, incy, stridey,
+                                                                batch_size, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::portblas> selector, std::int64_t n, float alpha,
                   const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                          beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                           beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::portblas> selector, std::int64_t n, double alpha,
                   const double* x, std::int64_t incx, const double beta, double* y,
                   std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                          beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                           beta, y, incy, dependencies);
     return done;
 }
 
@@ -2232,8 +2232,8 @@ sycl::event axpby(backend_selector<backend::portblas> selector, std::int64_t n,
                   std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                   const std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                          beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                           beta, y, incy, dependencies);
     return done;
 }
 
@@ -2241,8 +2241,8 @@ sycl::event axpby(backend_selector<backend::portblas> selector, std::int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                          beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                           beta, y, incy, dependencies);
     return done;
 }
 
@@ -2250,8 +2250,8 @@ sycl::event gerc(backend_selector<backend::portblas> selector, std::int64_t m, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                         y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x,
+                                                          incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2259,8 +2259,8 @@ sycl::event gerc(backend_selector<backend::portblas> selector, std::int64_t m, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                         y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x,
+                                                          incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2268,9 +2268,9 @@ sycl::event syr2k(backend_selector<backend::portblas> selector, uplo upper_lower
                   std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                   const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
-                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                           n, k, alpha, a, lda, b, ldb, beta, c,
+                                                           ldc, dependencies);
     return done;
 }
 
@@ -2278,9 +2278,9 @@ sycl::event syr2k(backend_selector<backend::portblas> selector, uplo upper_lower
                   std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                   const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
-                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                           n, k, alpha, a, lda, b, ldb, beta, c,
+                                                           ldc, dependencies);
     return done;
 }
 
@@ -2289,9 +2289,9 @@ sycl::event syr2k(backend_selector<backend::portblas> selector, uplo upper_lower
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
-                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                           n, k, alpha, a, lda, b, ldb, beta, c,
+                                                           ldc, dependencies);
     return done;
 }
 
@@ -2300,9 +2300,9 @@ sycl::event syr2k(backend_selector<backend::portblas> selector, uplo upper_lower
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
-                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                           n, k, alpha, a, lda, b, ldb, beta, c,
+                                                           ldc, dependencies);
     return done;
 }
 
@@ -2310,7 +2310,7 @@ sycl::event gemv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -2319,7 +2319,7 @@ sycl::event gemv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -2329,7 +2329,7 @@ sycl::event gemv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -2339,7 +2339,7 @@ sycl::event gemv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -2350,7 +2350,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose t
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2362,7 +2362,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose t
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2375,7 +2375,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose t
                        std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2388,7 +2388,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose t
                        std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2399,7 +2399,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose*
                        std::int64_t* lda, const float** x, std::int64_t* incx, float* beta,
                        float** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2410,7 +2410,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose*
                        std::int64_t* lda, const double** x, std::int64_t* incx, double* beta,
                        double** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2422,7 +2422,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose*
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
                        std::complex<float>** y, std::int64_t* incy, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2435,7 +2435,7 @@ sycl::event gemv_batch(backend_selector<backend::portblas> selector, transpose*
                        std::complex<double>* beta, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2446,7 +2446,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t stridea, const float* x, std::int64_t incx,
                        std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2457,7 +2457,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t stridea, const double* x, std::int64_t incx,
                        std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2469,7 +2469,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t incx, std::int64_t stridex, std::complex<float>* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2481,7 +2481,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t incx, std::int64_t stridex, std::complex<double>* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2492,7 +2492,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side* left_
                        const float** x, std::int64_t* incx, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2503,7 +2503,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side* left_
                        const double** x, std::int64_t* incx, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2514,7 +2514,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side* left_
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2525,7 +2525,7 @@ sycl::event dgmm_batch(backend_selector<backend::portblas> selector, side* left_
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2535,8 +2535,8 @@ sycl::event her(backend_selector<backend::portblas> selector, uplo upper_lower,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -2544,56 +2544,56 @@ sycl::event her(backend_selector<backend::portblas> selector, uplo upper_lower,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::portblas> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::portblas> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::portblas> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::portblas> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
@@ -2603,7 +2603,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2615,7 +2615,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2627,7 +2627,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2639,7 +2639,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2651,7 +2651,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2663,7 +2663,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2675,7 +2675,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        const std::complex<float>** b, std::int64_t* ldb, std::complex<float>* beta,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2688,7 +2688,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose*
                        std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2701,7 +2701,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
                        std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2713,7 +2713,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2725,7 +2725,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2737,7 +2737,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2749,7 +2749,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        const float* b, std::int64_t ldb, std::int64_t stride_b, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2761,7 +2761,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        const double* b, std::int64_t ldb, std::int64_t stride_b, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2774,7 +2774,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        std::int64_t stride_b, std::complex<float> beta, std::complex<float>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2787,7 +2787,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
                        std::int64_t stride_b, std::complex<double> beta, std::complex<double>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2796,7 +2796,7 @@ sycl::event gemm_batch(backend_selector<backend::portblas> selector, transpose t
 sycl::event spmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::spmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::spmv(
         selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -2804,7 +2804,7 @@ sycl::event spmv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event spmv(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::spmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::spmv(
         selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -2812,32 +2812,32 @@ sycl::event spmv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event swap(backend_selector<backend::portblas> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::portblas> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::portblas> selector, std::int64_t n,
                  std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::portblas> selector, std::int64_t n,
                  std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
@@ -2845,8 +2845,8 @@ sycl::event geru(backend_selector<backend::portblas> selector, std::int64_t m, s
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                         y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x,
+                                                          incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2854,38 +2854,38 @@ sycl::event geru(backend_selector<backend::portblas> selector, std::int64_t m, s
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                         y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x,
+                                                          incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::portblas> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::portblas> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
@@ -2893,9 +2893,9 @@ sycl::event gemm(backend_selector<backend::portblas> selector, transpose transa,
                  std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a,
                  std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2903,9 +2903,9 @@ sycl::event gemm(backend_selector<backend::portblas> selector, transpose transa,
                  std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a,
                  std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2914,9 +2914,9 @@ sycl::event gemm(backend_selector<backend::portblas> selector, transpose transa,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2925,9 +2925,9 @@ sycl::event gemm(backend_selector<backend::portblas> selector, transpose transa,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2936,9 +2936,9 @@ sycl::event gemm(backend_selector<backend::portblas> selector, transpose transa,
                  const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2946,9 +2946,9 @@ sycl::event gemm(backend_selector<backend::portblas> selector, transpose transa,
                  std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2956,9 +2956,9 @@ sycl::event gemm(backend_selector<backend::portblas> selector, transpose transa,
                  std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a,
                  std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2968,7 +2968,7 @@ sycl::event gemm_bias(backend_selector<backend::portblas> selector, transpose tr
                       std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2980,7 +2980,7 @@ sycl::event gemm_bias(backend_selector<backend::portblas> selector, transpose tr
                       std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2992,7 +2992,7 @@ sycl::event gemm_bias(backend_selector<backend::portblas> selector, transpose tr
                       std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -3004,7 +3004,7 @@ sycl::event gemm_bias(backend_selector<backend::portblas> selector, transpose tr
                       std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
                       float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -3014,7 +3014,7 @@ sycl::event herk(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::int64_t n, std::int64_t k, float alpha, const std::complex<float>* a,
                  std::int64_t lda, float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::herk(
+    auto done = oneapi::math::blas::portblas::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3023,7 +3023,7 @@ sycl::event herk(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::int64_t n, std::int64_t k, double alpha, const std::complex<double>* a,
                  std::int64_t lda, double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::herk(
+    auto done = oneapi::math::blas::portblas::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -3031,8 +3031,8 @@ sycl::event herk(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event ger(backend_selector<backend::portblas> selector, std::int64_t m, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                 float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
-                                                        y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                         y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3040,8 +3040,8 @@ sycl::event ger(backend_selector<backend::portblas> selector, std::int64_t m, st
                 double alpha, const double* x, std::int64_t incx, const double* y,
                 std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
-                                                        y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                         y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -3049,9 +3049,9 @@ sycl::event trsm(backend_selector<backend::portblas> selector, side left_right,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3059,9 +3059,9 @@ sycl::event trsm(backend_selector<backend::portblas> selector, side left_right,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3070,9 +3070,9 @@ sycl::event trsm(backend_selector<backend::portblas> selector, side left_right,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3081,9 +3081,9 @@ sycl::event trsm(backend_selector<backend::portblas> selector, side left_right,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3092,7 +3092,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3103,7 +3103,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3115,7 +3115,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3127,7 +3127,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side left_r
                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3138,7 +3138,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side* left_
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3149,7 +3149,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side* left_
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        double** b, std::int64_t* ldb, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3161,7 +3161,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side* left_
                        std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3173,7 +3173,7 @@ sycl::event trsm_batch(backend_selector<backend::portblas> selector, side* left_
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3183,8 +3183,8 @@ sycl::event dotu(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                         result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                          result, dependencies);
     return done;
 }
 
@@ -3192,8 +3192,8 @@ sycl::event dotu(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                         result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                          result, dependencies);
     return done;
 }
 
@@ -3202,9 +3202,9 @@ sycl::event hemm(backend_selector<backend::portblas> selector, side left_right,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right,
-                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
-                                                         beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right,
+                                                          upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                          beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3213,9 +3213,9 @@ sycl::event hemm(backend_selector<backend::portblas> selector, side left_right,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right,
-                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
-                                                         beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right,
+                                                          upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                          beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3223,8 +3223,8 @@ sycl::event hpr2(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n,
-                                                         alpha, x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n,
+                                                          alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3232,8 +3232,8 @@ sycl::event hpr2(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n,
-                                                         alpha, x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n,
+                                                          alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3242,8 +3242,8 @@ sycl::event gbmv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
-                                                 a, lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                  a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3252,8 +3252,8 @@ sycl::event gbmv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
-                                                 a, lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                  a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3263,8 +3263,8 @@ sycl::event gbmv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
-                                                 a, lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                  a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3274,15 +3274,15 @@ sycl::event gbmv(backend_selector<backend::portblas> selector, transpose trans,
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
-                                                 a, lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                  a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event tbmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3290,7 +3290,7 @@ sycl::event tbmv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event tbmv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3299,7 +3299,7 @@ sycl::event tbmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3308,7 +3308,7 @@ sycl::event tbmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3317,9 +3317,9 @@ sycl::event symm(backend_selector<backend::portblas> selector, side left_right,
                  std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
-                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
-                                                         beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                          upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                          beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3327,9 +3327,9 @@ sycl::event symm(backend_selector<backend::portblas> selector, side left_right,
                  std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
-                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
-                                                         beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                          upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                          beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3338,9 +3338,9 @@ sycl::event symm(backend_selector<backend::portblas> selector, side left_right,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
-                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
-                                                         beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                          upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                          beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3349,9 +3349,9 @@ sycl::event symm(backend_selector<backend::portblas> selector, side left_right,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
-                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
-                                                         beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                          upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                          beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3359,8 +3359,8 @@ sycl::event dotc(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                         result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                          result, dependencies);
     return done;
 }
 
@@ -3368,24 +3368,24 @@ sycl::event dotc(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                         result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                          result, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, a, lda, dependencies);
     return done;
 }
 
@@ -3393,9 +3393,9 @@ sycl::event trmm(backend_selector<backend::portblas> selector, side left_right,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3403,9 +3403,9 @@ sycl::event trmm(backend_selector<backend::portblas> selector, side left_right,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3414,9 +3414,9 @@ sycl::event trmm(backend_selector<backend::portblas> selector, side left_right,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3425,39 +3425,39 @@ sycl::event trmm(backend_selector<backend::portblas> selector, side left_right,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                         upper_lower, trans, unit_diag, m, n, alpha,
-                                                         a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                          upper_lower, trans, unit_diag, m, n,
+                                                          alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::portblas> selector, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
-                                                          param, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                           param, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::portblas> selector, double* d1, double* d2, double* x1,
                   double y1, double* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
-                                                          param, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                           param, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3465,8 +3465,8 @@ sycl::event tpsv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
@@ -3474,15 +3474,15 @@ sycl::event tpsv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                         unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                          unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3490,7 +3490,7 @@ sycl::event trsv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event trsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3499,7 +3499,7 @@ sycl::event trsv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3508,7 +3508,7 @@ sycl::event trsv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3516,32 +3516,32 @@ sycl::event trsv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event copy(backend_selector<backend::portblas> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::portblas> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                          dependencies);
     return done;
 }
 
@@ -3549,7 +3549,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3558,7 +3558,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3567,7 +3567,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3576,7 +3576,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3585,7 +3585,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3594,7 +3594,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3603,7 +3603,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3612,7 +3612,7 @@ sycl::event copy_batch(backend_selector<backend::portblas> selector, std::int64_
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3622,7 +3622,7 @@ sycl::event hemv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hemv(
+    auto done = oneapi::math::blas::portblas::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3632,7 +3632,7 @@ sycl::event hemv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::hemv(
+    auto done = oneapi::math::blas::portblas::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3641,9 +3641,9 @@ sycl::event gemmt(backend_selector<backend::portblas> selector, uplo upper_lower
                   transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a,
                   std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                          c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower,
+                                                           transa, transb, n, k, alpha, a, lda, b,
+                                                           ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3651,9 +3651,9 @@ sycl::event gemmt(backend_selector<backend::portblas> selector, uplo upper_lower
                   transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a,
                   std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                          c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower,
+                                                           transa, transb, n, k, alpha, a, lda, b,
+                                                           ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3662,9 +3662,9 @@ sycl::event gemmt(backend_selector<backend::portblas> selector, uplo upper_lower
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                          c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower,
+                                                           transa, transb, n, k, alpha, a, lda, b,
+                                                           ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3673,9 +3673,9 @@ sycl::event gemmt(backend_selector<backend::portblas> selector, uplo upper_lower
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                          c, ldc, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower,
+                                                           transa, transb, n, k, alpha, a, lda, b,
+                                                           ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3684,8 +3684,8 @@ sycl::event sbmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                 lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                  lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3694,45 +3694,45 @@ sycl::event sbmv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                 lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                  lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::portblas> selector, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::portblas> selector, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::portblas> selector, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event tbsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3740,7 +3740,7 @@ sycl::event tbsv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event tbsv(backend_selector<backend::portblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3749,7 +3749,7 @@ sycl::event tbsv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3758,7 +3758,7 @@ sycl::event tbsv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::portblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3766,78 +3766,78 @@ sycl::event tbsv(backend_selector<backend::portblas> selector, uplo upper_lower,
 sycl::event spr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n,
-                                                         alpha, x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n,
+                                                          alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event spr2(backend_selector<backend::portblas> selector, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n,
-                                                         alpha, x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n,
+                                                          alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::portblas> selector, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::portblas> selector, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::portblas> selector, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::portblas> selector, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                           dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::portblas> selector, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                         param, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                          param, dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::portblas> selector, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                         param, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                          param, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::portblas> selector, float* a, float* b, float* c,
                  float* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::portblas> selector, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3845,7 +3845,7 @@ sycl::event rotg(backend_selector<backend::portblas> selector, std::complex<floa
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3853,15 +3853,15 @@ sycl::event rotg(backend_selector<backend::portblas> selector, std::complex<doub
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event sdsdot(backend_selector<backend::portblas> selector, std::int64_t n, float sb,
                    const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                    float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
-                                                           incy, result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
+                                                            incy, result, dependencies);
     return done;
 }
 
@@ -3870,9 +3870,9 @@ sycl::event her2k(backend_selector<backend::portblas> selector, uplo upper_lower
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, float beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans,
-                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans,
+                                                           n, k, alpha, a, lda, b, ldb, beta, c,
+                                                           ldc, dependencies);
     return done;
 }
 
@@ -3881,33 +3881,33 @@ sycl::event her2k(backend_selector<backend::portblas> selector, uplo upper_lower
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, double beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans,
-                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                                                          dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans,
+                                                           n, k, alpha, a, lda, b, ldb, beta, c,
+                                                           ldc, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::portblas> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                        result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                         result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::portblas> selector, std::int64_t n, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                        result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                         result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::portblas> selector, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                        result, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                         result, dependencies);
     return done;
 }
 
@@ -3915,7 +3915,7 @@ sycl::event symv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx,
                  float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::symv(
+    auto done = oneapi::math::blas::portblas::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3924,7 +3924,7 @@ sycl::event symv(backend_selector<backend::portblas> selector, uplo upper_lower,
                  double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::symv(
+    auto done = oneapi::math::blas::portblas::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3934,7 +3934,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3945,7 +3945,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3956,7 +3956,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3967,7 +3967,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3977,7 +3977,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3986,7 +3986,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3996,7 +3996,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -4006,7 +4006,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -4017,7 +4017,7 @@ sycl::event omatadd_batch(backend_selector<backend::portblas> selector, transpos
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4029,7 +4029,7 @@ sycl::event omatadd_batch(backend_selector<backend::portblas> selector, transpos
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4042,7 +4042,7 @@ sycl::event omatadd_batch(backend_selector<backend::portblas> selector, transpos
                           const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4055,7 +4055,7 @@ sycl::event omatadd_batch(backend_selector<backend::portblas> selector, transpos
                           const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -4064,16 +4064,16 @@ sycl::event omatadd_batch(backend_selector<backend::portblas> selector, transpos
 sycl::event omatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event omatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4081,8 +4081,8 @@ sycl::event omatcopy(backend_selector<backend::portblas> selector, transpose tra
                      std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                      std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4090,8 +4090,8 @@ sycl::event omatcopy(backend_selector<backend::portblas> selector, transpose tra
                      std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                      std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -4099,7 +4099,7 @@ sycl::event omatcopy2(backend_selector<backend::portblas> selector, transpose tr
                       std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4108,7 +4108,7 @@ sycl::event omatcopy2(backend_selector<backend::portblas> selector, transpose tr
                       std::int64_t n, double alpha, const double* a, std::int64_t lda,
                       std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4118,7 +4118,7 @@ sycl::event omatcopy2(backend_selector<backend::portblas> selector, transpose tr
                       std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4128,7 +4128,7 @@ sycl::event omatcopy2(backend_selector<backend::portblas> selector, transpose tr
                       std::int64_t lda, std::int64_t stridea, std::complex<double>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4136,16 +4136,16 @@ sycl::event omatcopy2(backend_selector<backend::portblas> selector, transpose tr
 sycl::event imatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
 sycl::event imatcopy(backend_selector<backend::portblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4153,8 +4153,8 @@ sycl::event imatcopy(backend_selector<backend::portblas> selector, transpose tra
                      std::int64_t n, std::complex<float> alpha, std::complex<float>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4162,8 +4162,8 @@ sycl::event imatcopy(backend_selector<backend::portblas> selector, transpose tra
                      std::int64_t n, std::complex<double> alpha, std::complex<double>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                             alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                              alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4171,9 +4171,9 @@ sycl::event omatadd(backend_selector<backend::portblas> selector, transpose tran
                     transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a,
                     std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                            dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb,
+                                                             m, n, alpha, a, lda, beta, b, ldb, c,
+                                                             ldc, dependencies);
     return done;
 }
 
@@ -4181,9 +4181,9 @@ sycl::event omatadd(backend_selector<backend::portblas> selector, transpose tran
                     transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a,
                     std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                            dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb,
+                                                             m, n, alpha, a, lda, beta, b, ldb, c,
+                                                             ldc, dependencies);
     return done;
 }
 
@@ -4192,9 +4192,9 @@ sycl::event omatadd(backend_selector<backend::portblas> selector, transpose tran
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                            dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb,
+                                                             m, n, alpha, a, lda, beta, b, ldb, c,
+                                                             ldc, dependencies);
     return done;
 }
 
@@ -4203,9 +4203,9 @@ sycl::event omatadd(backend_selector<backend::portblas> selector, transpose tran
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                            dependencies);
+    auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb,
+                                                             m, n, alpha, a, lda, beta, b, ldb, c,
+                                                             ldc, dependencies);
     return done;
 }
 
@@ -4214,7 +4214,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4225,7 +4225,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4236,7 +4236,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4247,7 +4247,7 @@ sycl::event omatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4257,7 +4257,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4267,7 +4267,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4278,7 +4278,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4289,7 +4289,7 @@ sycl::event imatcopy_batch(backend_selector<backend::portblas> selector, transpo
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
         dependencies);
     return done;
diff --git a/include/oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp b/include/oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp
similarity index 70%
rename from include/oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp
rename to include/oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp
index c8d47d742..52420dc85 100644
--- a/include/oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp
+++ b/include/oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BLAS_PORTBLAS_HPP_
-#define _ONEMKL_BLAS_PORTBLAS_HPP_
+#ifndef _ONEMATH_BLAS_PORTBLAS_HPP_
+#define _ONEMATH_BLAS_PORTBLAS_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -28,34 +28,34 @@
 
 #include <complex>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
-using oneapi::mkl::transpose;
-using oneapi::mkl::uplo;
-using oneapi::mkl::side;
-using oneapi::mkl::diag;
-using oneapi::mkl::offset;
+using oneapi::math::transpose;
+using oneapi::math::uplo;
+using oneapi::math::side;
+using oneapi::math::diag;
+using oneapi::math::offset;
 
 namespace blas {
 namespace portblas {
 namespace column_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } //namespace column_major
 namespace row_major {
 
-#include "oneapi/mkl/blas/detail/onemkl_blas_backends.hxx"
+#include "oneapi/math/blas/detail/onemath_blas_backends.hxx"
 
 } //namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_BLAS_PORTBLAS_HPP_
+#endif // _ONEMATH_BLAS_PORTBLAS_HPP_
diff --git a/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hpp b/include/oneapi/math/blas/detail/rocblas/blas_ct.hpp
similarity index 84%
rename from include/oneapi/mkl/blas/detail/rocblas/blas_ct.hpp
rename to include/oneapi/math/blas/detail/rocblas/blas_ct.hpp
index 1a019b19e..73f2bc775 100644
--- a/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hpp
+++ b/include/oneapi/math/blas/detail/rocblas/blas_ct.hpp
@@ -30,14 +30,14 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp"
-#include "oneapi/mkl/blas/detail/blas_ct_backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp"
+#include "oneapi/math/blas/detail/blas_ct_backends.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 
@@ -54,7 +54,7 @@ namespace row_major {
 
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
 #endif //_DETAIL_ROCBLAS_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hxx b/include/oneapi/math/blas/detail/rocblas/blas_ct.hxx
similarity index 71%
rename from include/oneapi/mkl/blas/detail/rocblas/blas_ct.hxx
rename to include/oneapi/math/blas/detail/rocblas/blas_ct.hxx
index 7410315d2..42332ff59 100644
--- a/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hxx
+++ b/include/oneapi/math/blas/detail/rocblas/blas_ct.hxx
@@ -22,111 +22,113 @@
 void herk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans, int64_t n,
           int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda, float beta,
           sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, beta, c, ldc);
 }
 
 void herk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans, int64_t n,
           int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           double beta, sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, beta, c, ldc);
 }
 
 void scal(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
           sycl::buffer<float, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
           sycl::buffer<double, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void scal(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx);
 }
 
 void trmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<float, 1>& a, int64_t lda,
           sycl::buffer<float, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<double, 1>& a, int64_t lda,
           sycl::buffer<double, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void trmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void spr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
          sycl::buffer<float, 1>& x, int64_t incx, sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                            a);
 }
 
 void spr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
          sycl::buffer<double, 1>& x, int64_t incx, sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                            a);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -134,9 +136,9 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 int64_t lda, int64_t stride_a, sycl::buffer<float, 1>& b, int64_t ldb,
                 int64_t stride_b, float beta, sycl::buffer<float, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -144,9 +146,9 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 int64_t lda, int64_t stride_a, sycl::buffer<double, 1>& b, int64_t ldb,
                 int64_t stride_b, double beta, sycl::buffer<double, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -155,9 +157,9 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 sycl::buffer<std::complex<float>, 1>& b, int64_t ldb, int64_t stride_b,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -166,9 +168,9 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, int64_t stride_b,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -176,9 +178,9 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 int64_t lda, int64_t stride_a, sycl::buffer<sycl::half, 1>& b, int64_t ldb,
                 int64_t stride_b, sycl::half beta, sycl::buffer<sycl::half, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -186,9 +188,9 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 int64_t lda, int64_t stride_a, sycl::buffer<sycl::half, 1>& b, int64_t ldb,
                 int64_t stride_b, float beta, sycl::buffer<float, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -196,9 +198,9 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 int64_t lda, int64_t stride_a, sycl::buffer<std::int8_t, 1>& b, int64_t ldb,
                 int64_t stride_b, float beta, sycl::buffer<float, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -206,57 +208,57 @@ void gemm_batch(backend_selector<backend::rocblas> selector, transpose transa, t
                 int64_t lda, int64_t stride_a, sycl::buffer<std::int8_t, 1>& b, int64_t ldb,
                 int64_t stride_b, float beta, sycl::buffer<std::int32_t, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
-                                                  alpha, a, lda, stride_a, b, ldb, stride_b, beta,
-                                                  c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k,
+                                                   alpha, a, lda, stride_a, b, ldb, stride_b, beta,
+                                                   c, ldc, stride_c, batch_size);
 }
 
 void syrk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans, int64_t n,
           int64_t k, float alpha, sycl::buffer<float, 1>& a, int64_t lda, float beta,
           sycl::buffer<float, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans, int64_t n,
           int64_t k, double alpha, sycl::buffer<double, 1>& a, int64_t lda, double beta,
           sycl::buffer<double, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans, int64_t n,
           int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           int64_t lda, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
           int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, beta, c, ldc);
 }
 
 void syrk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans, int64_t n,
           int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           int64_t lda, std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
           int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                            a, lda, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                             a, lda, beta, c, ldc);
 }
 
 void syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                 int64_t n, int64_t k, float alpha, sycl::buffer<float, 1>& a, int64_t lda,
                 int64_t stride_a, float beta, sycl::buffer<float, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                   batch_size);
 }
 
 void syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                 int64_t n, int64_t k, double alpha, sycl::buffer<double, 1>& a, int64_t lda,
                 int64_t stride_a, double beta, sycl::buffer<double, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                   batch_size);
 }
 
 void syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
@@ -264,9 +266,9 @@ void syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_lower, t
                 sycl::buffer<std::complex<float>, 1>& a, int64_t lda, int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                   batch_size);
 }
 
 void syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
@@ -274,190 +276,190 @@ void syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_lower, t
                 sycl::buffer<std::complex<double>, 1>& a, int64_t lda, int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
-                                                  alpha, a, lda, stride_a, beta, c, ldc, stride_c,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k,
+                                                   alpha, a, lda, stride_a, beta, c, ldc, stride_c,
+                                                   batch_size);
 }
 
 void her2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a, lda);
+    oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a, lda);
 }
 
 void her2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a, lda);
+    oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a, lda);
 }
 
 void hbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, int64_t k,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                            x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                             x, incx, beta, y, incy);
 }
 
 void hbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, int64_t k,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                            x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                             x, incx, beta, y, incy);
 }
 
 void rot(backend_selector<backend::rocblas> selector, int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, int64_t incy, float c, float s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::rocblas> selector, int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, int64_t incy, double c, double s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
          int64_t incx, sycl::buffer<float, 1>& y, int64_t incy, float c, float s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void rot(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
          int64_t incx, sycl::buffer<double, 1>& y, int64_t incy, double c, double s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s);
 }
 
 void axpy(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
           sycl::buffer<float, 1>& x, int64_t incx, sycl::buffer<float, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
           sycl::buffer<double, 1>& x, int64_t incx, sycl::buffer<double, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy);
 }
 
 void axpy_batch(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, int64_t incx, int64_t stridex, sycl::buffer<float, 1>& y,
                 int64_t incy, int64_t stridey, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                  y, incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                   y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<double, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                  y, incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                   y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                  y, incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                   y, incy, stridey, batch_size);
 }
 
 void axpy_batch(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
-                                                  y, incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex,
+                                                   y, incy, stridey, batch_size);
 }
 
 void axpby(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
            sycl::buffer<float, 1>& x, int64_t incx, float beta, sycl::buffer<float, 1>& y,
            int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                             incy);
+    oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                              incy);
 }
 
 void axpby(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
            sycl::buffer<double, 1>& x, int64_t incx, double beta, sycl::buffer<double, 1>& y,
            int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                             incy);
+    oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                              incy);
 }
 
 void axpby(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& x, int64_t incx, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                             incy);
+    oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                              incy);
 }
 
 void axpby(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& x, int64_t incx, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
-                                             incy);
+    oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y,
+                                              incy);
 }
 
 void sdsdot(backend_selector<backend::rocblas> selector, int64_t n, float sb,
             sycl::buffer<float, 1>& x, int64_t incx, sycl::buffer<float, 1>& y, int64_t incy,
             sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
-                                              result);
+    oneapi::math::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy,
+                                               result);
 }
 
 void gerc(backend_selector<backend::rocblas> selector, int64_t m, int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                            lda);
+    oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                             lda);
 }
 
 void gerc(backend_selector<backend::rocblas> selector, int64_t m, int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                            lda);
+    oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                             lda);
 }
 
 void syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
            int64_t n, int64_t k, float alpha, sycl::buffer<float, 1>& a, int64_t lda,
            sycl::buffer<float, 1>& b, int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
            int64_t n, int64_t k, double alpha, sycl::buffer<double, 1>& a, int64_t lda,
            sycl::buffer<double, 1>& b, int64_t ldb, double beta, sycl::buffer<double, 1>& c,
            int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
            int64_t n, int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
            int64_t lda, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb,
            std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
@@ -465,56 +467,56 @@ void syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transp
            sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
           float alpha, sycl::buffer<float, 1>& a, int64_t lda, sycl::buffer<float, 1>& x,
           int64_t incx, float beta, sycl::buffer<float, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
           double alpha, sycl::buffer<double, 1>& a, int64_t lda, sycl::buffer<double, 1>& x,
           int64_t incx, double beta, sycl::buffer<double, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void gemv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void gemv_batch(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
                 float alpha, sycl::buffer<float, 1>& a, int64_t lda, int64_t stridea,
                 sycl::buffer<float, 1>& x, int64_t incx, int64_t stridex, float beta,
                 sycl::buffer<float, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, x, incx, stridex, beta, y, incy,
+                                                   stridey, batch_size);
 }
 
 void gemv_batch(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
                 double alpha, sycl::buffer<double, 1>& a, int64_t lda, int64_t stridea,
                 sycl::buffer<double, 1>& x, int64_t incx, int64_t stridex, double beta,
                 sycl::buffer<double, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, x, incx, stridex, beta, y, incy,
+                                                   stridey, batch_size);
 }
 
 void gemv_batch(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
@@ -522,9 +524,9 @@ void gemv_batch(backend_selector<backend::rocblas> selector, transpose trans, in
                 int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
                 int64_t stridex, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
                 int64_t incy, int64_t stridey, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, x, incx, stridex, beta, y, incy,
+                                                   stridey, batch_size);
 }
 
 void gemv_batch(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
@@ -533,27 +535,27 @@ void gemv_batch(backend_selector<backend::rocblas> selector, transpose trans, in
                 int64_t stridex, std::complex<double> beta,
                 sycl::buffer<std::complex<double>, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                  stridea, x, incx, stridex, beta, y, incy, stridey,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                   stridea, x, incx, stridex, beta, y, incy,
+                                                   stridey, batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::rocblas> selector, side left_right, int64_t m, int64_t n,
                 sycl::buffer<float, 1>& a, int64_t lda, int64_t stridea, sycl::buffer<float, 1>& x,
                 int64_t incx, int64_t stridex, sycl::buffer<float, 1>& c, int64_t ldc,
                 int64_t stridec, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                  stridea, x, incx, stridex, c, ldc, stridec,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                   stridea, x, incx, stridex, c, ldc, stridec,
+                                                   batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::rocblas> selector, side left_right, int64_t m, int64_t n,
                 sycl::buffer<double, 1>& a, int64_t lda, int64_t stridea,
                 sycl::buffer<double, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<double, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                  stridea, x, incx, stridex, c, ldc, stridec,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                   stridea, x, incx, stridex, c, ldc, stridec,
+                                                   batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::rocblas> selector, side left_right, int64_t m, int64_t n,
@@ -561,9 +563,9 @@ void dgmm_batch(backend_selector<backend::rocblas> selector, side left_right, in
                 sycl::buffer<std::complex<float>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& c, int64_t ldc, int64_t stridec,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                  stridea, x, incx, stridex, c, ldc, stridec,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                   stridea, x, incx, stridex, c, ldc, stridec,
+                                                   batch_size);
 }
 
 void dgmm_batch(backend_selector<backend::rocblas> selector, side left_right, int64_t m, int64_t n,
@@ -571,87 +573,89 @@ void dgmm_batch(backend_selector<backend::rocblas> selector, side left_right, in
                 sycl::buffer<std::complex<double>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& c, int64_t ldc, int64_t stridec,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
-                                                  stridea, x, incx, stridex, c, ldc, stridec,
-                                                  batch_size);
+    oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda,
+                                                   stridea, x, incx, stridex, c, ldc, stridec,
+                                                   batch_size);
 }
 
 void her(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                           lda);
+    oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                            lda);
 }
 
 void her(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                           lda);
+    oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                            lda);
 }
 
 void hpr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                            a);
 }
 
 void hpr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a);
+    oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                            a);
 }
 
 void iamin(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
            int64_t incx, sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
            int64_t incx, sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::rocblas> selector, int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
            sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void iamin(backend_selector<backend::rocblas> selector, int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
            sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result);
 }
 
 void hpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                            beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                             incx, beta, y, incy);
 }
 
 void hpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                            beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                             incx, beta, y, incy);
 }
 
 void spmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
           sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, int64_t incx, float beta,
           sycl::buffer<float, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                            beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                             incx, beta, y, incy);
 }
 
 void spmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
           sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, int64_t incx, double beta,
           sycl::buffer<double, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx,
-                                            beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x,
+                                             incx, beta, y, incy);
 }
 
 void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -659,9 +663,9 @@ void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, tr
                sycl::buffer<int8_t, 1>& a, int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c, int64_t ldc,
                sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                 co);
+    oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                  co);
 }
 
 void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -669,9 +673,9 @@ void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, tr
                sycl::buffer<int8_t, 1>& a, int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c, int64_t ldc,
                sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                 co);
+    oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                  co);
 }
 
 void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -679,9 +683,9 @@ void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, tr
                sycl::buffer<uint8_t, 1>& a, int64_t lda, uint8_t ao, sycl::buffer<int8_t, 1>& b,
                int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c, int64_t ldc,
                sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                 co);
+    oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                  co);
 }
 
 void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -689,83 +693,83 @@ void gemm_bias(backend_selector<backend::rocblas> selector, transpose transa, tr
                sycl::buffer<uint8_t, 1>& a, int64_t lda, uint8_t ao, sycl::buffer<uint8_t, 1>& b,
                int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c, int64_t ldc,
                sycl::buffer<int32_t, 1>& co) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
-                                                 n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
-                                                 co);
+    oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m,
+                                                  n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+                                                  co);
 }
 
 void swap(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
           int64_t incx, sycl::buffer<float, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
           int64_t incx, sycl::buffer<double, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void swap(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void geru(backend_selector<backend::rocblas> selector, int64_t m, int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                            lda);
+    oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                             lda);
 }
 
 void geru(backend_selector<backend::rocblas> selector, int64_t m, int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                            lda);
+    oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                             lda);
 }
 
 void nrm2(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
           int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void nrm2(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
           int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result);
 }
 
 void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
           int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer<float, 1>& a, int64_t lda,
           sycl::buffer<float, 1>& b, int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                            lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
           int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer<double, 1>& a, int64_t lda,
           sycl::buffer<double, 1>& b, int64_t ldb, double beta, sycl::buffer<double, 1>& c,
           int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                            lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -773,8 +777,8 @@ void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpo
           sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                            lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -782,152 +786,152 @@ void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpo
           sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                            lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
           int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a,
           int64_t lda, sycl::buffer<sycl::half, 1>& b, int64_t ldb, sycl::half beta,
           sycl::buffer<sycl::half, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                            lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
           int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a, int64_t lda,
           sycl::buffer<sycl::half, 1>& b, int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                            lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemm(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
           int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a, int64_t lda,
           sycl::buffer<bfloat16, 1>& b, int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a,
-                                            lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha,
+                                             a, lda, b, ldb, beta, c, ldc);
 }
 
 void syr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
           sycl::buffer<float, 1>& x, int64_t incx, sycl::buffer<float, 1>& y, int64_t incy,
           sycl::buffer<float, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a, lda);
+    oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a, lda);
 }
 
 void syr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
           sycl::buffer<double, 1>& x, int64_t incx, sycl::buffer<double, 1>& y, int64_t incy,
           sycl::buffer<double, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a, lda);
+    oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a, lda);
 }
 
 void ger(backend_selector<backend::rocblas> selector, int64_t m, int64_t n, float alpha,
          sycl::buffer<float, 1>& x, int64_t incx, sycl::buffer<float, 1>& y, int64_t incy,
          sycl::buffer<float, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void ger(backend_selector<backend::rocblas> selector, int64_t m, int64_t n, double alpha,
          sycl::buffer<double, 1>& x, int64_t incx, sycl::buffer<double, 1>& y, int64_t incy,
          sycl::buffer<double, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
-                                           lda);
+    oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a,
+                                            lda);
 }
 
 void trsm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha,
           sycl::buffer<float, 1>& a, int64_t lda, sycl::buffer<float, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha,
           sycl::buffer<double, 1>& a, int64_t lda, sycl::buffer<double, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trsm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void dotu(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotu(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void hemm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower, int64_t m,
           int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hemm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower, int64_t m,
           int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<double>, 1>& b, int64_t ldb,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
+                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void hpr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a);
+    oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a);
 }
 
 void hpr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a);
+    oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a);
 }
 
 void gbmv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
           int64_t kl, int64_t ku, float alpha, sycl::buffer<float, 1>& a, int64_t lda,
           sycl::buffer<float, 1>& x, int64_t incx, float beta, sycl::buffer<float, 1>& y,
           int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                            lda, x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                             lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
           int64_t kl, int64_t ku, double alpha, sycl::buffer<double, 1>& a, int64_t lda,
           sycl::buffer<double, 1>& x, int64_t incx, double beta, sycl::buffer<double, 1>& y,
           int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                            lda, x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                             lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
@@ -935,8 +939,8 @@ void gbmv(backend_selector<backend::rocblas> selector, transpose trans, int64_t
           sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                            lda, x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                             lda, x, incx, beta, y, incy);
 }
 
 void gbmv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
@@ -944,275 +948,275 @@ void gbmv(backend_selector<backend::rocblas> selector, transpose trans, int64_t
           sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                            lda, x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
+                                             lda, x, incx, beta, y, incy);
 }
 
 void tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<float, 1>& a, int64_t lda,
           sycl::buffer<float, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<double, 1>& a, int64_t lda,
           sycl::buffer<double, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void symm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower, int64_t m,
           int64_t n, float alpha, sycl::buffer<float, 1>& a, int64_t lda, sycl::buffer<float, 1>& b,
           int64_t ldb, float beta, sycl::buffer<float, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower, int64_t m,
           int64_t n, double alpha, sycl::buffer<double, 1>& a, int64_t lda,
           sycl::buffer<double, 1>& b, int64_t ldb, double beta, sycl::buffer<double, 1>& c,
           int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower, int64_t m,
           int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void symm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower, int64_t m,
           int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<double>, 1>& b, int64_t ldb,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                            alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
+                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void dotc(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dotc(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void syr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
          sycl::buffer<float, 1>& x, int64_t incx, sycl::buffer<float, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                           lda);
+    oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                            lda);
 }
 
 void syr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
          sycl::buffer<double, 1>& x, int64_t incx, sycl::buffer<double, 1>& a, int64_t lda) {
-    oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
-                                           lda);
+    oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a,
+                                            lda);
 }
 
 void trmm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha,
           sycl::buffer<float, 1>& a, int64_t lda, sycl::buffer<float, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha,
           sycl::buffer<double, 1>& a, int64_t lda, sycl::buffer<double, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void trmm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
-                                            unit_diag, m, n, alpha, a, lda, b, ldb);
+    oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans,
+                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
 void rotmg(backend_selector<backend::rocblas> selector, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void rotmg(backend_selector<backend::rocblas> selector, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
+    oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param);
 }
 
 void tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, x, incx);
 }
 
 void trsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<float, 1>& a, int64_t lda,
           sycl::buffer<float, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<double, 1>& a, int64_t lda,
           sycl::buffer<double, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void trsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             a, lda, x, incx);
 }
 
 void copy(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
           int64_t incx, sycl::buffer<float, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
           int64_t incx, sycl::buffer<double, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy);
 }
 
 void copy_batch(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
                 int64_t incx, int64_t stridex, sycl::buffer<float, 1>& y, int64_t incy,
                 int64_t stridey, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                  incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                   incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
                 int64_t incx, int64_t stridex, sycl::buffer<double, 1>& y, int64_t incy,
                 int64_t stridey, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                  incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                   incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::rocblas> selector, int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                  incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                   incy, stridey, batch_size);
 }
 
 void copy_batch(backend_selector<backend::rocblas> selector, int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
-                                                  incy, stridey, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y,
+                                                   incy, stridey, batch_size);
 }
 
 void hemv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void hemv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose transa,
            transpose transb, int64_t n, int64_t k, float alpha, sycl::buffer<float, 1>& a,
            int64_t lda, sycl::buffer<float, 1>& b, int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose transa,
            transpose transb, int64_t n, int64_t k, double alpha, sycl::buffer<double, 1>& a,
            int64_t lda, sycl::buffer<double, 1>& b, int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose transa,
@@ -1220,8 +1224,8 @@ void gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower, transp
            sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose transa,
@@ -1229,149 +1233,149 @@ void gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower, transp
            sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
-                                             k, alpha, a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n,
+                                              k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
 void asum(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::rocblas> selector, int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
           int64_t incx, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void asum(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
           int64_t incx, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result);
 }
 
 void sbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, int64_t k,
           float alpha, sycl::buffer<float, 1>& a, int64_t lda, sycl::buffer<float, 1>& x,
           int64_t incx, float beta, sycl::buffer<float, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                            x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                             x, incx, beta, y, incy);
 }
 
 void sbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, int64_t k,
           double alpha, sycl::buffer<double, 1>& a, int64_t lda, sycl::buffer<double, 1>& x,
           int64_t incx, double beta, sycl::buffer<double, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
-                                            x, incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda,
+                                             x, incx, beta, y, incy);
 }
 
 void tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<float, 1>& a, int64_t lda,
           sycl::buffer<float, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<double, 1>& a, int64_t lda,
           sycl::buffer<double, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<float>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
           diag unit_diag, int64_t n, int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           int64_t lda, sycl::buffer<std::complex<double>, 1>& x, int64_t incx) {
-    oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
-                                            k, a, lda, x, incx);
+    oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n,
+                                             k, a, lda, x, incx);
 }
 
 void spr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
           sycl::buffer<float, 1>& x, int64_t incx, sycl::buffer<float, 1>& y, int64_t incy,
           sycl::buffer<float, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a);
+    oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a);
 }
 
 void spr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
           sycl::buffer<double, 1>& x, int64_t incx, sycl::buffer<double, 1>& y, int64_t incy,
           sycl::buffer<double, 1>& a) {
-    oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y,
-                                            incy, a);
+    oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx,
+                                             y, incy, a);
 }
 
 void iamax(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
            int64_t incx, sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
            int64_t incx, sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::rocblas> selector, int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
            sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void iamax(backend_selector<backend::rocblas> selector, int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, int64_t incx,
            sycl::buffer<int64_t, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
+    oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result);
 }
 
 void rotm(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
           int64_t incx, sycl::buffer<float, 1>& y, int64_t incy, sycl::buffer<float, 1>& param) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void rotm(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
           int64_t incx, sycl::buffer<double, 1>& y, int64_t incy, sycl::buffer<double, 1>& param) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
+    oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param);
 }
 
 void dot(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
          int64_t incx, sycl::buffer<float, 1>& y, int64_t incy, sycl::buffer<float, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<double, 1>& x,
          int64_t incx, sycl::buffer<double, 1>& y, int64_t incy, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void dot(backend_selector<backend::rocblas> selector, int64_t n, sycl::buffer<float, 1>& x,
          int64_t incx, sycl::buffer<float, 1>& y, int64_t incy, sycl::buffer<double, 1>& result) {
-    oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
+    oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result);
 }
 
 void trsm_batch(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha,
                 sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a, sycl::buffer<float, 1>& b,
                 int64_t ldb, int64_t stride_b, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                  b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                   b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha,
                 sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<double, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                  b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                   b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
@@ -1379,9 +1383,9 @@ void trsm_batch(backend_selector<backend::rocblas> selector, side left_right, up
                 sycl::buffer<std::complex<float>, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<std::complex<float>, 1>& b, int64_t ldb, int64_t stride_b,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                  b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                   b, ldb, stride_b, batch_size);
 }
 
 void trsm_batch(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
@@ -1389,17 +1393,17 @@ void trsm_batch(backend_selector<backend::rocblas> selector, side left_right, up
                 sycl::buffer<std::complex<double>, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, int64_t stride_b,
                 int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
-                                                  trans, unit_diag, m, n, alpha, a, lda, stride_a,
-                                                  b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower,
+                                                   trans, unit_diag, m, n, alpha, a, lda, stride_a,
+                                                   b, ldb, stride_b, batch_size);
 }
 
 void her2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
            int64_t n, int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
            int64_t lda, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb, float beta,
            sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void her2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
@@ -1407,60 +1411,60 @@ void her2k(backend_selector<backend::rocblas> selector, uplo upper_lower, transp
            sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, double beta,
            sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
-                                             a, lda, b, ldb, beta, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha,
+                                              a, lda, b, ldb, beta, c, ldc);
 }
 
 void rotg(backend_selector<backend::rocblas> selector, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::rocblas> selector, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::rocblas> selector, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void rotg(backend_selector<backend::rocblas> selector, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
-    oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
+    oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s);
 }
 
 void symv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, float alpha,
           sycl::buffer<float, 1>& a, int64_t lda, sycl::buffer<float, 1>& x, int64_t incx,
           float beta, sycl::buffer<float, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void symv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n, double alpha,
           sycl::buffer<double, 1>& a, int64_t lda, sycl::buffer<double, 1>& x, int64_t incx,
           double beta, sycl::buffer<double, 1>& y, int64_t incy) {
-    oneapi::mkl::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
-                                            incx, beta, y, incy);
+    oneapi::math::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x,
+                                             incx, beta, y, incy);
 }
 
 void omatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                      lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                       lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                      lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                       lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
@@ -1468,8 +1472,8 @@ void omatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                      lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                       lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void omatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
@@ -1477,38 +1481,38 @@ void omatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
-                                                      lda, stride_a, b, ldb, stride_b, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a,
+                                                       lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                      lda, ldb, stride, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                       lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                      lda, ldb, stride, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                       lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                      lda, ldb, stride, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                       lda, ldb, stride, batch_size);
 }
 
 void imatcopy_batch(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
-                                                      lda, ldb, stride, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab,
+                                                       lda, ldb, stride, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -1516,9 +1520,9 @@ void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa
                    std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                     alpha, a, lda, stride_a, beta, b, ldb,
-                                                     stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                      alpha, a, lda, stride_a, beta, b, ldb,
+                                                      stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -1526,9 +1530,9 @@ void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa
                    std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<double, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                     alpha, a, lda, stride_a, beta, b, ldb,
-                                                     stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                      alpha, a, lda, stride_a, beta, b, ldb,
+                                                      stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -1537,9 +1541,9 @@ void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
                    std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                     alpha, a, lda, stride_a, beta, b, ldb,
-                                                     stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                      alpha, a, lda, stride_a, beta, b, ldb,
+                                                      stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -1549,113 +1553,113 @@ void omatadd_batch(backend_selector<backend::rocblas> selector, transpose transa
                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                    std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
-                                                     alpha, a, lda, stride_a, beta, b, ldb,
-                                                     stride_b, c, ldc, stride_c, batch_size);
+    oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n,
+                                                      alpha, a, lda, stride_a, beta, b, ldb,
+                                                      stride_b, c, ldc, stride_c, batch_size);
 }
 
 void omatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 b, ldb);
 }
 
 void omatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 b, ldb);
 }
 
 void omatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 b, ldb);
 }
 
 void omatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                 b, ldb);
 }
 
 void omatcopy2(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, b, ldb, strideb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, b, ldb, strideb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, b, ldb, strideb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, b, ldb, strideb);
 }
 
 void omatcopy2(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
-                                                 stridea, b, ldb, strideb);
+    oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda,
+                                                  stridea, b, ldb, strideb);
 }
 
 void imatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                 ldb);
 }
 
 void imatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                 ldb);
 }
 
 void imatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                 ldb);
 }
 
 void imatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
-    oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
-                                                ldb);
+    oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda,
+                                                 ldb);
 }
 
 void omatadd(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                               lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                a, lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                               lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                a, lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -1663,8 +1667,8 @@ void omatadd(backend_selector<backend::rocblas> selector, transpose transa, tran
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                               lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                a, lda, beta, b, ldb, c, ldc);
 }
 
 void omatadd(backend_selector<backend::rocblas> selector, transpose transa, transpose transb,
@@ -1672,8 +1676,8 @@ void omatadd(backend_selector<backend::rocblas> selector, transpose transa, tran
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a,
-                                               lda, beta, b, ldb, c, ldc);
+    oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha,
+                                                a, lda, beta, b, ldb, c, ldc);
 }
 
 // USM APIs
@@ -1681,69 +1685,69 @@ void omatadd(backend_selector<backend::rocblas> selector, transpose transa, tran
 sycl::event syr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                  float alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* a,
                  int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr2(
+        selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event syr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                  double alpha, const double* x, int64_t incx, const double* y, int64_t incy,
                  double* a, int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr2(
+        selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::rocblas> selector, int64_t n, float alpha, float* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                         dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::rocblas> selector, int64_t n, double alpha, double* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                         dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float> alpha,
                  std::complex<float>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                         dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double> alpha,
                  std::complex<double>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                         dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
                  std::complex<float>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                         dependencies);
     return done;
 }
 
 sycl::event scal(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
                  std::complex<double>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx,
+                                                         dependencies);
     return done;
 }
 
 sycl::event trmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const float* a, int64_t lda, float* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1751,7 +1755,7 @@ sycl::event trmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event trmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const double* a, int64_t lda, double* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1760,7 +1764,7 @@ sycl::event trmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, const std::complex<float>* a, int64_t lda,
                  std::complex<float>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1769,7 +1773,7 @@ sycl::event trmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, const std::complex<double>* a, int64_t lda,
                  std::complex<double>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -1777,48 +1781,48 @@ sycl::event trmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const float* a, float* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const double* a, double* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const std::complex<float>* a, std::complex<float>* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const std::complex<double>* a, std::complex<double>* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 float alpha, const float* x, int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, dependencies);
     return done;
 }
 
 sycl::event spr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 double alpha, const double* x, int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, dependencies);
     return done;
 }
 
@@ -1827,8 +1831,8 @@ sycl::event hpmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  const std::complex<float>* x, int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                        a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hpmv(
+        selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1837,15 +1841,15 @@ sycl::event hpmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  const std::complex<double>* x, int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha,
-                                                        a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hpmv(
+        selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event syrk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  int64_t n, int64_t k, float alpha, const float* a, int64_t lda, float beta,
                  float* c, int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1853,7 +1857,7 @@ sycl::event syrk(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event syrk(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  int64_t n, int64_t k, double alpha, const double* a, int64_t lda, double beta,
                  double* c, int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1862,7 +1866,7 @@ sycl::event syrk(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  int64_t n, int64_t k, std::complex<float> alpha, const std::complex<float>* a,
                  int64_t lda, std::complex<float> beta, std::complex<float>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1871,7 +1875,7 @@ sycl::event syrk(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  int64_t n, int64_t k, std::complex<double> alpha, const std::complex<double>* a,
                  int64_t lda, std::complex<double> beta, std::complex<double>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -1880,7 +1884,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo* upper_
                        transpose* trans, int64_t* n, int64_t* k, float* alpha, const float** a,
                        int64_t* lda, float* beta, float** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1890,7 +1894,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo* upper_
                        transpose* trans, int64_t* n, int64_t* k, double* alpha, const double** a,
                        int64_t* lda, double* beta, double** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1901,7 +1905,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo* upper_
                        const std::complex<float>** a, int64_t* lda, std::complex<float>* beta,
                        std::complex<float>** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1912,7 +1916,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo* upper_
                        const std::complex<double>** a, int64_t* lda, std::complex<double>* beta,
                        std::complex<double>** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count,
         group_size, dependencies);
     return done;
@@ -1923,7 +1927,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_l
                        int64_t lda, int64_t stride_a, float beta, float* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1934,7 +1938,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_l
                        int64_t lda, int64_t stride_a, double beta, double* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1946,7 +1950,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_l
                        std::complex<float> beta, std::complex<float>* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1958,7 +1962,7 @@ sycl::event syrk_batch(backend_selector<backend::rocblas> selector, uplo upper_l
                        std::complex<double> beta, std::complex<double>* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
         stride_c, batch_size, dependencies);
     return done;
@@ -1968,8 +1972,8 @@ sycl::event her2(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  std::complex<float> alpha, const std::complex<float>* x, int64_t incx,
                  const std::complex<float>* y, int64_t incy, std::complex<float>* a, int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::her2(
+        selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -1977,8 +1981,8 @@ sycl::event her2(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  std::complex<double> alpha, const std::complex<double>* x, int64_t incx,
                  const std::complex<double>* y, int64_t incy, std::complex<double>* a, int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::her2(
+        selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -1988,8 +1992,8 @@ sycl::event hbmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                 lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -1999,72 +2003,72 @@ sycl::event hbmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                 lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float>* x,
                 int64_t incx, std::complex<float>* y, int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                       s, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                        c, s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double>* x,
                 int64_t incx, std::complex<double>* y, int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                       s, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                        c, s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::rocblas> selector, int64_t n, float* x, int64_t incx,
                 float* y, int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                       s, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                        c, s, dependencies);
     return done;
 }
 
 sycl::event rot(backend_selector<backend::rocblas> selector, int64_t n, double* x, int64_t incx,
                 double* y, int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c,
-                                                       s, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy,
+                                                        c, s, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
                  const float* x, int64_t incx, float* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                        incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                         incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
                  const double* x, int64_t incx, double* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                        incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                         incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float> alpha,
                  const std::complex<float>* x, int64_t incx, std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                        incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                         incy, dependencies);
     return done;
 }
 
 sycl::event axpy(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double> alpha,
                  const std::complex<double>* x, int64_t incx, std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
-                                                        incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y,
+                                                         incy, dependencies);
     return done;
 }
 
@@ -2072,7 +2076,7 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
                        const float** x, int64_t* incx, float** y, int64_t* incy,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2081,7 +2085,7 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
                        const double** x, int64_t* incx, double** y, int64_t* incy,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2090,7 +2094,7 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
                        std::complex<float>* alpha, const std::complex<float>** x, int64_t* incx,
                        std::complex<float>** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2099,7 +2103,7 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
                        std::complex<double>* alpha, const std::complex<double>** x, int64_t* incx,
                        std::complex<double>** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(
         selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -2108,9 +2112,9 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t n, f
                        const float* x, int64_t incx, int64_t stridex, float* y, int64_t incy,
                        int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                              incx, stridex, y, incy, stridey,
-                                                              batch_size, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                               incx, stridex, y, incy, stridey,
+                                                               batch_size, dependencies);
     return done;
 }
 
@@ -2118,9 +2122,9 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t n, d
                        const double* x, int64_t incx, int64_t stridex, double* y, int64_t incy,
                        int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                              incx, stridex, y, incy, stridey,
-                                                              batch_size, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                               incx, stridex, y, incy, stridey,
+                                                               batch_size, dependencies);
     return done;
 }
 
@@ -2128,9 +2132,9 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t n,
                        std::complex<float> alpha, const std::complex<float>* x, int64_t incx,
                        int64_t stridex, std::complex<float>* y, int64_t incy, int64_t stridey,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                              incx, stridex, y, incy, stridey,
-                                                              batch_size, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                               incx, stridex, y, incy, stridey,
+                                                               batch_size, dependencies);
     return done;
 }
 
@@ -2138,25 +2142,25 @@ sycl::event axpy_batch(backend_selector<backend::rocblas> selector, int64_t n,
                        std::complex<double> alpha, const std::complex<double>* x, int64_t incx,
                        int64_t stridex, std::complex<double>* y, int64_t incy, int64_t stridey,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
-                                                              incx, stridex, y, incy, stridey,
-                                                              batch_size, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x,
+                                                               incx, stridex, y, incy, stridey,
+                                                               batch_size, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::rocblas> selector, int64_t n, float alpha,
                   const float* x, int64_t incx, const float beta, float* y, int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                         beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                          beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event axpby(backend_selector<backend::rocblas> selector, int64_t n, double alpha,
                   const double* x, int64_t incx, const double beta, double* y, int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                         beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                          beta, y, incy, dependencies);
     return done;
 }
 
@@ -2164,8 +2168,8 @@ sycl::event axpby(backend_selector<backend::rocblas> selector, int64_t n, std::c
                   const std::complex<float>* x, int64_t incx, const std::complex<float> beta,
                   std::complex<float>* y, int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                         beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                          beta, y, incy, dependencies);
     return done;
 }
 
@@ -2173,8 +2177,8 @@ sycl::event axpby(backend_selector<backend::rocblas> selector, int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
-                                                         beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx,
+                                                          beta, y, incy, dependencies);
     return done;
 }
 
@@ -2182,8 +2186,8 @@ sycl::event gerc(backend_selector<backend::rocblas> selector, int64_t m, int64_t
                  std::complex<float> alpha, const std::complex<float>* x, int64_t incx,
                  const std::complex<float>* y, int64_t incy, std::complex<float>* a, int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                        y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                         y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2191,8 +2195,8 @@ sycl::event gerc(backend_selector<backend::rocblas> selector, int64_t m, int64_t
                  std::complex<double> alpha, const std::complex<double>* x, int64_t incx,
                  const std::complex<double>* y, int64_t incy, std::complex<double>* a, int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
-                                                        y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx,
+                                                         y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2200,9 +2204,9 @@ sycl::event syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b,
                   int64_t ldb, float beta, float* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2210,9 +2214,9 @@ sycl::event syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b,
                   int64_t ldb, double beta, double* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2221,9 +2225,9 @@ sycl::event syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   int64_t lda, const std::complex<float>* b, int64_t ldb, std::complex<float> beta,
                   std::complex<float>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -2232,17 +2236,17 @@ sycl::event syr2k(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   int64_t lda, const std::complex<double>* b, int64_t ldb,
                   std::complex<double> beta, std::complex<double>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
 sycl::event gemv(backend_selector<backend::rocblas> selector, transpose trans, int64_t m, int64_t n,
                  float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta,
                  float* y, int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                        lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2250,8 +2254,8 @@ sycl::event gemv(backend_selector<backend::rocblas> selector, transpose trans, i
                  double alpha, const double* a, int64_t lda, const double* x, int64_t incx,
                  double beta, double* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                        lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2260,8 +2264,8 @@ sycl::event gemv(backend_selector<backend::rocblas> selector, transpose trans, i
                  const std::complex<float>* x, int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                        lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2270,8 +2274,8 @@ sycl::event gemv(backend_selector<backend::rocblas> selector, transpose trans, i
                  const std::complex<double>* x, int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a,
-                                                        lda, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv(
+        selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -2280,7 +2284,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose tr
                        const float* x, int64_t incx, int64_t stridex, float beta, float* y,
                        int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2291,7 +2295,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose tr
                        const double* x, int64_t incx, int64_t stridex, double beta, double* y,
                        int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2303,7 +2307,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose tr
                        int64_t stridex, std::complex<float> beta, std::complex<float>* y,
                        int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2315,7 +2319,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose tr
                        int64_t stridex, std::complex<double> beta, std::complex<double>* y,
                        int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy,
         stridey, batch_size, dependencies);
     return done;
@@ -2325,7 +2329,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose* t
                        int64_t* n, float* alpha, const float** a, int64_t* lda, const float** x,
                        int64_t* incx, float* beta, float** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2335,7 +2339,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose* t
                        int64_t* n, double* alpha, const double** a, int64_t* lda, const double** x,
                        int64_t* incx, double* beta, double** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2347,7 +2351,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose* t
                        std::complex<float>* beta, std::complex<float>** y, int64_t* incy,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2359,7 +2363,7 @@ sycl::event gemv_batch(backend_selector<backend::rocblas> selector, transpose* t
                        std::complex<double>* beta, std::complex<double>** y, int64_t* incy,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count,
         group_size, dependencies);
     return done;
@@ -2369,7 +2373,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        int64_t n, const float* a, int64_t lda, int64_t stridea, const float* x,
                        int64_t incx, int64_t stridex, float* c, int64_t ldc, int64_t stridec,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2379,7 +2383,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        int64_t n, const double* a, int64_t lda, int64_t stridea, const double* x,
                        int64_t incx, int64_t stridex, double* c, int64_t ldc, int64_t stridec,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2390,7 +2394,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        const std::complex<float>* x, int64_t incx, int64_t stridex,
                        std::complex<float>* c, int64_t ldc, int64_t stridec, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2401,7 +2405,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        const std::complex<double>* x, int64_t incx, int64_t stridex,
                        std::complex<double>* c, int64_t ldc, int64_t stridec, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec,
         batch_size, dependencies);
     return done;
@@ -2411,7 +2415,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        int64_t* n, const float** a, int64_t* lda, const float** x, int64_t* incx,
                        float** c, int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2421,7 +2425,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        int64_t* n, const double** a, int64_t* lda, const double** x, int64_t* incx,
                        double** c, int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2432,7 +2436,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        const std::complex<float>** x, int64_t* incx, std::complex<float>** c,
                        int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2443,7 +2447,7 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        const std::complex<double>** x, int64_t* incx, std::complex<double>** c,
                        int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch(
         selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size,
         dependencies);
     return done;
@@ -2452,62 +2456,62 @@ sycl::event dgmm_batch(backend_selector<backend::rocblas> selector, side* left_r
 sycl::event her(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 float alpha, const std::complex<float>* x, int64_t incx, std::complex<float>* a,
                 int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event her(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 double alpha, const std::complex<double>* x, int64_t incx, std::complex<double>* a,
                 int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 float alpha, const std::complex<float>* x, int64_t incx, std::complex<float>* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, dependencies);
     return done;
 }
 
 sycl::event hpr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 double alpha, const std::complex<double>* x, int64_t incx, std::complex<double>* a,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                   int64_t incx, int64_t* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::rocblas> selector, int64_t n, const double* x,
                   int64_t incx, int64_t* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::rocblas> selector, int64_t n,
                   const std::complex<float>* x, int64_t incx, int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event iamin(backend_selector<backend::rocblas> selector, int64_t n,
                   const std::complex<double>* x, int64_t incx, int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
@@ -2516,7 +2520,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const float** a, int64_t* lda, const float** b, int64_t* ldb, float* beta,
                        float** c, int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2527,7 +2531,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const double** a, int64_t* lda, const double** b, int64_t* ldb, double* beta,
                        double** c, int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2539,7 +2543,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const std::complex<float>** b, int64_t* ldb, std::complex<float>* beta,
                        std::complex<float>** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2551,7 +2555,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const std::complex<double>** b, int64_t* ldb, std::complex<double>* beta,
                        std::complex<double>** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2562,7 +2566,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const sycl::half** a, int64_t* lda, const sycl::half** b, int64_t* ldb,
                        sycl::half* beta, sycl::half** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2573,7 +2577,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const sycl::half** a, int64_t* lda, const sycl::half** b, int64_t* ldb,
                        float* beta, float** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2584,7 +2588,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const std::int8_t** a, int64_t* lda, const std::int8_t** b, int64_t* ldb,
                        float* beta, float** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2595,7 +2599,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose* t
                        const std::int8_t** a, int64_t* lda, const std::int8_t** b, int64_t* ldb,
                        float* beta, std::int32_t** c, int64_t* ldc, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
         group_count, group_size, dependencies);
     return done;
@@ -2606,7 +2610,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        const float* a, int64_t lda, int64_t stride_a, const float* b, int64_t ldb,
                        int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2617,7 +2621,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        const double* a, int64_t lda, int64_t stride_a, const double* b, int64_t ldb,
                        int64_t stride_b, double beta, double* c, int64_t ldc, int64_t stride_c,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2630,7 +2634,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        std::complex<float> beta, std::complex<float>* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2643,7 +2647,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        int64_t stride_b, std::complex<double> beta, std::complex<double>* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2655,7 +2659,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        int64_t ldb, int64_t stride_b, sycl::half beta, sycl::half* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2667,7 +2671,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2679,7 +2683,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2691,7 +2695,7 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
                        int64_t ldb, int64_t stride_b, float beta, std::int32_t* c, int64_t ldc,
                        int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch(
         selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b,
         beta, c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -2700,46 +2704,46 @@ sycl::event gemm_batch(backend_selector<backend::rocblas> selector, transpose tr
 sycl::event spmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                  float alpha, const float* a, const float* x, int64_t incx, float beta, float* y,
                  int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                        a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::spmv(
+        selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event spmv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                  double alpha, const double* a, const double* x, int64_t incx, double beta,
                  double* y, int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha,
-                                                        a, x, incx, beta, y, incy, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::spmv(
+        selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::rocblas> selector, int64_t n, float* x, int64_t incx,
                  float* y, int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::rocblas> selector, int64_t n, double* x, int64_t incx,
                  double* y, int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::rocblas> selector, int64_t n, std::complex<float>* x,
                  int64_t incx, std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
 sycl::event swap(backend_selector<backend::rocblas> selector, int64_t n, std::complex<double>* x,
                  int64_t incx, std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
@@ -2747,8 +2751,8 @@ sycl::event geru(backend_selector<backend::rocblas> selector, int64_t m, int64_t
                  std::complex<float> alpha, const std::complex<float>* x, int64_t incx,
                  const std::complex<float>* y, int64_t incy, std::complex<float>* a, int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                        y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                         y, incy, a, lda, dependencies);
     return done;
 }
 
@@ -2756,38 +2760,38 @@ sycl::event geru(backend_selector<backend::rocblas> selector, int64_t m, int64_t
                  std::complex<double> alpha, const std::complex<double>* x, int64_t incx,
                  const std::complex<double>* y, int64_t incy, std::complex<double>* a, int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
-                                                        y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx,
+                                                         y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<float>* x, int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<double>* x, int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                  int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event nrm2(backend_selector<backend::rocblas> selector, int64_t n, const double* x,
                  int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
@@ -2796,8 +2800,8 @@ sycl::event gemm(backend_selector<backend::rocblas> selector, transpose transa,
                  const float* b, int64_t ldb, float beta, float* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2806,8 +2810,8 @@ sycl::event gemm(backend_selector<backend::rocblas> selector, transpose transa,
                  const double* b, int64_t ldb, double beta, double* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2817,8 +2821,8 @@ sycl::event gemm(backend_selector<backend::rocblas> selector, transpose transa,
                  int64_t ldb, std::complex<float> beta, std::complex<float>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2828,8 +2832,8 @@ sycl::event gemm(backend_selector<backend::rocblas> selector, transpose transa,
                  int64_t ldb, std::complex<double> beta, std::complex<double>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2838,8 +2842,8 @@ sycl::event gemm(backend_selector<backend::rocblas> selector, transpose transa,
                  int64_t lda, const sycl::half* b, int64_t ldb, sycl::half beta, sycl::half* c,
                  int64_t ldc, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2848,8 +2852,8 @@ sycl::event gemm(backend_selector<backend::rocblas> selector, transpose transa,
                  const sycl::half* b, int64_t ldb, float beta, float* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2858,8 +2862,8 @@ sycl::event gemm(backend_selector<backend::rocblas> selector, transpose transa,
                  const bfloat16* b, int64_t ldb, float beta, float* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k,
+                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
     return done;
 }
 
@@ -2869,7 +2873,7 @@ sycl::event gemm_bias(backend_selector<backend::rocblas> selector, transpose tra
                       const std::uint8_t* b, int64_t ldb, std::uint8_t bo, float beta,
                       std::int32_t* c, int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2881,7 +2885,7 @@ sycl::event gemm_bias(backend_selector<backend::rocblas> selector, transpose tra
                       const std::int8_t* b, int64_t ldb, std::int8_t bo, float beta,
                       std::int32_t* c, int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2893,7 +2897,7 @@ sycl::event gemm_bias(backend_selector<backend::rocblas> selector, transpose tra
                       const std::int8_t* b, int64_t ldb, std::int8_t bo, float beta,
                       std::int32_t* c, int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2905,7 +2909,7 @@ sycl::event gemm_bias(backend_selector<backend::rocblas> selector, transpose tra
                       const std::uint8_t* b, int64_t ldb, std::uint8_t bo, float beta,
                       std::int32_t* c, int64_t ldc, const std::int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias(
         selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta,
         c, ldc, co, dependencies);
     return done;
@@ -2915,7 +2919,7 @@ sycl::event herk(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  int64_t n, int64_t k, float alpha, const std::complex<float>* a, int64_t lda,
                  float beta, std::complex<float>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::herk(
+    auto done = oneapi::math::blas::rocblas::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -2924,7 +2928,7 @@ sycl::event herk(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  int64_t n, int64_t k, double alpha, const std::complex<double>* a, int64_t lda,
                  double beta, std::complex<double>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::herk(
+    auto done = oneapi::math::blas::rocblas::MAJOR::herk(
         selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
     return done;
 }
@@ -2932,25 +2936,25 @@ sycl::event herk(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event ger(backend_selector<backend::rocblas> selector, int64_t m, int64_t n, float alpha,
                 const float* x, int64_t incx, const float* y, int64_t incy, float* a, int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event ger(backend_selector<backend::rocblas> selector, int64_t m, int64_t n, double alpha,
                 const double* x, int64_t incx, const double* y, int64_t incy, double* a,
                 int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
-                                                       y, incy, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx,
+                                                        y, incy, a, lda, dependencies);
     return done;
 }
 
 sycl::event trsm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float* a,
                  int64_t lda, float* b, int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -2958,9 +2962,9 @@ sycl::event trsm(backend_selector<backend::rocblas> selector, side left_right, u
                  transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha,
                  const double* a, int64_t lda, double* b, int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -2968,9 +2972,9 @@ sycl::event trsm(backend_selector<backend::rocblas> selector, side left_right, u
                  transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, int64_t lda, std::complex<float>* b, int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -2978,9 +2982,9 @@ sycl::event trsm(backend_selector<backend::rocblas> selector, side left_right, u
                  transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, int64_t lda, std::complex<double>* b, int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -2989,7 +2993,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        float alpha, const float* a, int64_t lda, int64_t stride_a, float* b,
                        int64_t ldb, int64_t stride_b, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3000,7 +3004,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        double alpha, const double* a, int64_t lda, int64_t stride_a, double* b,
                        int64_t ldb, int64_t stride_b, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3011,7 +3015,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        std::complex<float> alpha, const std::complex<float>* a, int64_t lda,
                        int64_t stride_a, std::complex<float>* b, int64_t ldb, int64_t stride_b,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3022,7 +3026,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side left_ri
                        std::complex<double> alpha, const std::complex<double>* a, int64_t lda,
                        int64_t stride_a, std::complex<double>* b, int64_t ldb, int64_t stride_b,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
         stride_a, b, ldb, stride_b, batch_size, dependencies);
     return done;
@@ -3033,7 +3037,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3044,7 +3048,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3055,7 +3059,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        std::complex<float>* alpha, const std::complex<float>** a, int64_t* lda,
                        std::complex<float>** b, int64_t* ldb, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3066,7 +3070,7 @@ sycl::event trsm_batch(backend_selector<backend::rocblas> selector, side* left_r
                        std::complex<double>* alpha, const std::complex<double>** a, int64_t* lda,
                        std::complex<double>** b, int64_t* ldb, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch(
         selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b,
         ldb, group_count, group_size, dependencies);
     return done;
@@ -3076,8 +3080,8 @@ sycl::event dotu(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<float>* x, int64_t incx, const std::complex<float>* y,
                  int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                        result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                         result, dependencies);
     return done;
 }
 
@@ -3085,8 +3089,8 @@ sycl::event dotu(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<double>* x, int64_t incx, const std::complex<double>* y,
                  int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
-                                                        result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy,
+                                                         result, dependencies);
     return done;
 }
 
@@ -3095,9 +3099,9 @@ sycl::event hemm(backend_selector<backend::rocblas> selector, side left_right, u
                  int64_t lda, const std::complex<float>* b, int64_t ldb, std::complex<float> beta,
                  std::complex<float>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right,
+                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                         beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3106,9 +3110,9 @@ sycl::event hemm(backend_selector<backend::rocblas> selector, side left_right, u
                  int64_t lda, const std::complex<double>* b, int64_t ldb, std::complex<double> beta,
                  std::complex<double>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right,
+                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                         beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3116,8 +3120,8 @@ sycl::event hpr2(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  std::complex<float> alpha, const std::complex<float>* x, int64_t incx,
                  const std::complex<float>* y, int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3125,8 +3129,8 @@ sycl::event hpr2(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  std::complex<double> alpha, const std::complex<double>* x, int64_t incx,
                  const std::complex<double>* y, int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
@@ -3135,8 +3139,8 @@ sycl::event gbmv(backend_selector<backend::rocblas> selector, transpose trans, i
                  int64_t incx, float beta, float* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                 a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3145,8 +3149,8 @@ sycl::event gbmv(backend_selector<backend::rocblas> selector, transpose trans, i
                  const double* x, int64_t incx, double beta, double* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                 a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3156,8 +3160,8 @@ sycl::event gbmv(backend_selector<backend::rocblas> selector, transpose trans, i
                  std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                 a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3167,15 +3171,15 @@ sycl::event gbmv(backend_selector<backend::rocblas> selector, transpose trans, i
                  std::complex<double> beta, std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha,
+                                                 a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, int64_t k, const float* a, int64_t lda, float* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3183,7 +3187,7 @@ sycl::event tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, int64_t k, const double* a, int64_t lda, double* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3192,7 +3196,7 @@ sycl::event tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, int64_t k, const std::complex<float>* a, int64_t lda,
                  std::complex<float>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3201,7 +3205,7 @@ sycl::event tbmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, int64_t k, const std::complex<double>* a, int64_t lda,
                  std::complex<double>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbmv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3210,9 +3214,9 @@ sycl::event symm(backend_selector<backend::rocblas> selector, side left_right, u
                  int64_t m, int64_t n, float alpha, const float* a, int64_t lda, const float* b,
                  int64_t ldb, float beta, float* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                         beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3220,9 +3224,9 @@ sycl::event symm(backend_selector<backend::rocblas> selector, side left_right, u
                  int64_t m, int64_t n, double alpha, const double* a, int64_t lda, const double* b,
                  int64_t ldb, double beta, double* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                         beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3231,9 +3235,9 @@ sycl::event symm(backend_selector<backend::rocblas> selector, side left_right, u
                  int64_t lda, const std::complex<float>* b, int64_t ldb, std::complex<float> beta,
                  std::complex<float>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                         beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3242,9 +3246,9 @@ sycl::event symm(backend_selector<backend::rocblas> selector, side left_right, u
                  int64_t lda, const std::complex<double>* b, int64_t ldb, std::complex<double> beta,
                  std::complex<double>* c, int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n,
-                                                alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right,
+                                                         upper_lower, m, n, alpha, a, lda, b, ldb,
+                                                         beta, c, ldc, dependencies);
     return done;
 }
 
@@ -3252,8 +3256,8 @@ sycl::event dotc(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<float>* x, int64_t incx, const std::complex<float>* y,
                  int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                        result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                         result, dependencies);
     return done;
 }
 
@@ -3261,33 +3265,33 @@ sycl::event dotc(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<double>* x, int64_t incx, const std::complex<double>* y,
                  int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
-                                                        result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy,
+                                                         result, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 float alpha, const float* x, int64_t incx, float* a, int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event syr(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                 double alpha, const double* x, int64_t incx, double* a, int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
-                                                       x, incx, a, lda, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha,
+                                                        x, incx, a, lda, dependencies);
     return done;
 }
 
 sycl::event trmm(backend_selector<backend::rocblas> selector, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float* a,
                  int64_t lda, float* b, int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3295,9 +3299,9 @@ sycl::event trmm(backend_selector<backend::rocblas> selector, side left_right, u
                  transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha,
                  const double* a, int64_t lda, double* b, int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3305,9 +3309,9 @@ sycl::event trmm(backend_selector<backend::rocblas> selector, side left_right, u
                  transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, int64_t lda, std::complex<float>* b, int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3315,62 +3319,62 @@ sycl::event trmm(backend_selector<backend::rocblas> selector, side left_right, u
                  transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, int64_t lda, std::complex<double>* b, int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
-                                                        upper_lower, trans, unit_diag, m, n, alpha,
-                                                        a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right,
+                                                         upper_lower, trans, unit_diag, m, n, alpha,
+                                                         a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::rocblas> selector, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
-                                                         param, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                          param, dependencies);
     return done;
 }
 
 sycl::event rotmg(backend_selector<backend::rocblas> selector, double* d1, double* d2, double* x1,
                   double y1, double* param, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
-                                                         param, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1,
+                                                          param, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const float* a, float* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const double* a, double* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const std::complex<float>* a, std::complex<float>* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event tpsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const std::complex<double>* a, std::complex<double>* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
-                                                        unit_diag, n, a, x, incx, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans,
+                                                         unit_diag, n, a, x, incx, dependencies);
     return done;
 }
 
 sycl::event trsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const float* a, int64_t lda, float* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3378,7 +3382,7 @@ sycl::event trsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event trsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, const double* a, int64_t lda, double* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3387,7 +3391,7 @@ sycl::event trsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, const std::complex<float>* a, int64_t lda,
                  std::complex<float>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3396,7 +3400,7 @@ sycl::event trsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, const std::complex<double>* a, int64_t lda,
                  std::complex<double>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::trsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3404,39 +3408,39 @@ sycl::event trsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event copy(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                  int64_t incx, float* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::rocblas> selector, int64_t n, const double* x,
                  int64_t incx, double* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<float>* x, int64_t incx, std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
 sycl::event copy(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<double>* x, int64_t incx, std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy,
+                                                         dependencies);
     return done;
 }
 
 sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t* n, const float** x,
                        int64_t* incx, float** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3444,7 +3448,7 @@ sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
 sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t* n, const double** x,
                        int64_t* incx, double** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3453,7 +3457,7 @@ sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
                        const std::complex<float>** x, int64_t* incx, std::complex<float>** y,
                        int64_t* incy, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3462,7 +3466,7 @@ sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
                        const std::complex<double>** x, int64_t* incx, std::complex<double>** y,
                        int64_t* incy, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies);
     return done;
 }
@@ -3470,7 +3474,7 @@ sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t* n,
 sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                        int64_t incx, int64_t stridex, float* y, int64_t incy, int64_t stridey,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3478,7 +3482,7 @@ sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t n, c
 sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t n, const double* x,
                        int64_t incx, int64_t stridex, double* y, int64_t incy, int64_t stridey,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3487,7 +3491,7 @@ sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t n,
                        const std::complex<float>* x, int64_t incx, int64_t stridex,
                        std::complex<float>* y, int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3496,7 +3500,7 @@ sycl::event copy_batch(backend_selector<backend::rocblas> selector, int64_t n,
                        const std::complex<double>* x, int64_t incx, int64_t stridex,
                        std::complex<double>* y, int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch(
         selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
     return done;
 }
@@ -3506,7 +3510,7 @@ sycl::event hemv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  const std::complex<float>* x, int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hemv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3516,7 +3520,7 @@ sycl::event hemv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  const std::complex<double>* x, int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::hemv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::hemv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3525,9 +3529,9 @@ sycl::event gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   transpose transb, int64_t n, int64_t k, float alpha, const float* a, int64_t lda,
                   const float* b, int64_t ldb, float beta, float* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                         c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                          c, ldc, dependencies);
     return done;
 }
 
@@ -3535,9 +3539,9 @@ sycl::event gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   transpose transb, int64_t n, int64_t k, double alpha, const double* a,
                   int64_t lda, const double* b, int64_t ldb, double beta, double* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                         c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                          c, ldc, dependencies);
     return done;
 }
 
@@ -3546,9 +3550,9 @@ sycl::event gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   const std::complex<float>* a, int64_t lda, const std::complex<float>* b,
                   int64_t ldb, std::complex<float> beta, std::complex<float>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                         c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                          c, ldc, dependencies);
     return done;
 }
 
@@ -3557,9 +3561,9 @@ sycl::event gemmt(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   const std::complex<double>* a, int64_t lda, const std::complex<double>* b,
                   int64_t ldb, std::complex<double> beta, std::complex<double>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
-                                                         transb, n, k, alpha, a, lda, b, ldb, beta,
-                                                         c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa,
+                                                          transb, n, k, alpha, a, lda, b, ldb, beta,
+                                                          c, ldc, dependencies);
     return done;
 }
 
@@ -3567,8 +3571,8 @@ sycl::event sbmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  int64_t k, float alpha, const float* a, int64_t lda, const float* x, int64_t incx,
                  float beta, float* y, int64_t incy, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                 lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
@@ -3577,45 +3581,45 @@ sycl::event sbmv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  int64_t incx, double beta, double* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
-                                                lda, x, incx, beta, y, incy, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a,
+                                                 lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<float>* x, int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::rocblas> selector, int64_t n,
                  const std::complex<double>* x, int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                  int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event asum(backend_selector<backend::rocblas> selector, int64_t n, const double* x,
                  int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
-                                                        dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result,
+                                                         dependencies);
     return done;
 }
 
 sycl::event tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, int64_t k, const float* a, int64_t lda, float* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3623,7 +3627,7 @@ sycl::event tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t n, int64_t k, const double* a, int64_t lda, double* x,
                  int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3632,7 +3636,7 @@ sycl::event tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, int64_t k, const std::complex<float>* a, int64_t lda,
                  std::complex<float>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3641,7 +3645,7 @@ sycl::event tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  diag unit_diag, int64_t n, int64_t k, const std::complex<double>* a, int64_t lda,
                  std::complex<double>* x, int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::tbsv(
         selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
     return done;
 }
@@ -3649,76 +3653,76 @@ sycl::event tbsv(backend_selector<backend::rocblas> selector, uplo upper_lower,
 sycl::event spr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                  float alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* a,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event spr2(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                  double alpha, const double* x, int64_t incx, const double* y, int64_t incy,
                  double* a, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha,
-                                                        x, incx, y, incy, a, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n,
+                                                         alpha, x, incx, y, incy, a, dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                   int64_t incx, int64_t* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::rocblas> selector, int64_t n, const double* x,
                   int64_t incx, int64_t* result, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::rocblas> selector, int64_t n,
                   const std::complex<float>* x, int64_t incx, int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event iamax(backend_selector<backend::rocblas> selector, int64_t n,
                   const std::complex<double>* x, int64_t incx, int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
-                                                         dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result,
+                                                          dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::rocblas> selector, int64_t n, float* x, int64_t incx,
                  float* y, int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                        param, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event rotm(backend_selector<backend::rocblas> selector, int64_t n, double* x, int64_t incx,
                  double* y, int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
-                                                        param, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy,
+                                                         param, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::rocblas> selector, float* a, float* b, float* c,
                  float* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event rotg(backend_selector<backend::rocblas> selector, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3726,7 +3730,7 @@ sycl::event rotg(backend_selector<backend::rocblas> selector, std::complex<float
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
@@ -3734,15 +3738,15 @@ sycl::event rotg(backend_selector<backend::rocblas> selector, std::complex<doubl
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
+        oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies);
     return done;
 }
 
 sycl::event sdsdot(backend_selector<backend::rocblas> selector, int64_t n, float sb, const float* x,
                    int64_t incx, const float* y, int64_t incy, float* result,
                    const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
-                                                          incy, result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y,
+                                                           incy, result, dependencies);
     return done;
 }
 
@@ -3751,9 +3755,9 @@ sycl::event her2k(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   int64_t lda, const std::complex<float>* b, int64_t ldb, float beta,
                   std::complex<float>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
@@ -3762,40 +3766,40 @@ sycl::event her2k(backend_selector<backend::rocblas> selector, uplo upper_lower,
                   int64_t lda, const std::complex<double>* b, int64_t ldb, double beta,
                   std::complex<double>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    auto done =
-        oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k,
-                                                 alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans,
+                                                          n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+                                                          dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                 int64_t incx, const float* y, int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::rocblas> selector, int64_t n, const double* x,
                 int64_t incx, const double* y, int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
 sycl::event dot(backend_selector<backend::rocblas> selector, int64_t n, const float* x,
                 int64_t incx, const float* y, int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
-                                                       result, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy,
+                                                        result, dependencies);
     return done;
 }
 
 sycl::event symv(backend_selector<backend::rocblas> selector, uplo upper_lower, int64_t n,
                  float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta,
                  float* y, int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::symv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3804,7 +3808,7 @@ sycl::event symv(backend_selector<backend::rocblas> selector, uplo upper_lower,
                  double alpha, const double* a, int64_t lda, const double* x, int64_t incx,
                  double beta, double* y, int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::symv(
+    auto done = oneapi::math::blas::rocblas::MAJOR::symv(
         selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
     return done;
 }
@@ -3814,7 +3818,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3825,7 +3829,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3836,7 +3840,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3847,7 +3851,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size,
         dependencies);
     return done;
@@ -3857,7 +3861,7 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3866,7 +3870,7 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3876,7 +3880,7 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3886,7 +3890,7 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
     return done;
 }
@@ -3897,7 +3901,7 @@ sycl::event omatadd_batch(backend_selector<backend::rocblas> selector, transpose
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -3909,7 +3913,7 @@ sycl::event omatadd_batch(backend_selector<backend::rocblas> selector, transpose
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
                           std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -3922,7 +3926,7 @@ sycl::event omatadd_batch(backend_selector<backend::rocblas> selector, transpose
                           const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -3935,7 +3939,7 @@ sycl::event omatadd_batch(backend_selector<backend::rocblas> selector, transpose
                           const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                           std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
                           std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch(
         selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b,
         c, ldc, stride_c, batch_size, dependencies);
     return done;
@@ -3944,16 +3948,16 @@ sycl::event omatadd_batch(backend_selector<backend::rocblas> selector, transpose
 sycl::event omatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
 sycl::event omatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3961,8 +3965,8 @@ sycl::event omatcopy(backend_selector<backend::rocblas> selector, transpose tran
                      std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                      std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3970,8 +3974,8 @@ sycl::event omatcopy(backend_selector<backend::rocblas> selector, transpose tran
                      std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                      std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, a, lda, b, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, a, lda, b, ldb, dependencies);
     return done;
 }
 
@@ -3979,7 +3983,7 @@ sycl::event omatcopy2(backend_selector<backend::rocblas> selector, transpose tra
                       std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -3988,7 +3992,7 @@ sycl::event omatcopy2(backend_selector<backend::rocblas> selector, transpose tra
                       std::int64_t n, double alpha, const double* a, std::int64_t lda,
                       std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -3998,7 +4002,7 @@ sycl::event omatcopy2(backend_selector<backend::rocblas> selector, transpose tra
                       std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4008,7 +4012,7 @@ sycl::event omatcopy2(backend_selector<backend::rocblas> selector, transpose tra
                       std::int64_t lda, std::int64_t stridea, std::complex<double>* b,
                       std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2(
         selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
     return done;
 }
@@ -4016,16 +4020,16 @@ sycl::event omatcopy2(backend_selector<backend::rocblas> selector, transpose tra
 sycl::event imatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
 sycl::event imatcopy(backend_selector<backend::rocblas> selector, transpose trans, std::int64_t m,
                      std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4033,8 +4037,8 @@ sycl::event imatcopy(backend_selector<backend::rocblas> selector, transpose tran
                      std::int64_t n, std::complex<float> alpha, std::complex<float>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4042,8 +4046,8 @@ sycl::event imatcopy(backend_selector<backend::rocblas> selector, transpose tran
                      std::int64_t n, std::complex<double> alpha, std::complex<double>* ab,
                      std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
-                                                            alpha, ab, lda, ldb, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n,
+                                                             alpha, ab, lda, ldb, dependencies);
     return done;
 }
 
@@ -4051,9 +4055,9 @@ sycl::event omatadd(backend_selector<backend::rocblas> selector, transpose trans
                     std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                     float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                           dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                            dependencies);
     return done;
 }
 
@@ -4061,9 +4065,9 @@ sycl::event omatadd(backend_selector<backend::rocblas> selector, transpose trans
                     std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                     double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                           dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                            dependencies);
     return done;
 }
 
@@ -4072,9 +4076,9 @@ sycl::event omatadd(backend_selector<backend::rocblas> selector, transpose trans
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                           dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                            dependencies);
     return done;
 }
 
@@ -4083,9 +4087,9 @@ sycl::event omatadd(backend_selector<backend::rocblas> selector, transpose trans
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
-                                                           n, alpha, a, lda, beta, b, ldb, c, ldc,
-                                                           dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m,
+                                                            n, alpha, a, lda, beta, b, ldb, c, ldc,
+                                                            dependencies);
     return done;
 }
 
@@ -4094,7 +4098,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4105,7 +4109,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4116,7 +4120,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4127,7 +4131,7 @@ sycl::event omatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(
+    auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(
         selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize,
         dependencies);
     return done;
@@ -4137,9 +4141,9 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                  alpha, ab, lda, ldb, group_count,
-                                                                  groupsize, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4147,9 +4151,9 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                  alpha, ab, lda, ldb, group_count,
-                                                                  groupsize, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4158,9 +4162,9 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                  alpha, ab, lda, ldb, group_count,
-                                                                  groupsize, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
 
@@ -4169,8 +4173,8 @@ sycl::event imatcopy_batch(backend_selector<backend::rocblas> selector, transpos
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n,
-                                                                  alpha, ab, lda, ldb, group_count,
-                                                                  groupsize, dependencies);
+    auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(
+        selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize,
+        dependencies);
     return done;
 }
diff --git a/include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp b/include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp
similarity index 75%
rename from include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp
rename to include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp
index a642e5609..45f4f888d 100644
--- a/include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp
+++ b/include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp
@@ -18,8 +18,8 @@
 *  limitations under the License.
 *
 **************************************************************************/
-#ifndef _ONEMKL_BLAS_ROCBLAS_HPP_
-#define _ONEMKL_BLAS_ROCBLAS_HPP_
+#ifndef _ONEMATH_BLAS_ROCBLAS_HPP_
+#define _ONEMATH_BLAS_ROCBLAS_HPP_
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
@@ -28,31 +28,31 @@
 #include <complex>
 #include <cstdint>
 #include <string>
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/config.hpp"
 
 namespace oneapi {
-namespace mkl {
-using oneapi::mkl::diag;
-using oneapi::mkl::offset;
-using oneapi::mkl::side;
-using oneapi::mkl::transpose;
-using oneapi::mkl::uplo;
+namespace math {
+using oneapi::math::diag;
+using oneapi::math::offset;
+using oneapi::math::side;
+using oneapi::math::transpose;
+using oneapi::math::uplo;
 namespace blas {
 namespace rocblas {
 namespace column_major {
 
-#include "onemkl_blas_rocblas.hxx"
+#include "onemath_blas_rocblas.hxx"
 
 } //namespace column_major
 namespace row_major {
 
-#include "onemkl_blas_rocblas.hxx"
+#include "onemath_blas_rocblas.hxx"
 
 } //namespace row_major
 } //namespace rocblas
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_BLAS_ROCBLAS_HPP_
+#endif //_ONEMATH_BLAS_ROCBLAS_HPP_
diff --git a/include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hxx b/include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hxx
similarity index 100%
rename from include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hxx
rename to include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hxx
diff --git a/include/oneapi/mkl/detail/backend_selector.hpp b/include/oneapi/math/detail/backend_selector.hpp
similarity index 78%
rename from include/oneapi/mkl/detail/backend_selector.hpp
rename to include/oneapi/math/detail/backend_selector.hpp
index b0c763ae0..905d3ac01 100644
--- a/include/oneapi/mkl/detail/backend_selector.hpp
+++ b/include/oneapi/math/detail/backend_selector.hpp
@@ -17,15 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BACKEND_SELECTOR_HPP_
-#define _ONEMKL_BACKEND_SELECTOR_HPP_
+#ifndef _ONEMATH_BACKEND_SELECTOR_HPP_
+#define _ONEMATH_BACKEND_SELECTOR_HPP_
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
-#include "oneapi/mkl/detail/backend_selector_predicates.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/detail/backends.hpp"
+#include "oneapi/math/detail/backend_selector_predicates.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 template <backend Backend>
 class backend_selector {
@@ -41,7 +41,7 @@ class backend_selector {
     sycl::queue queue_;
 };
 
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_BACKEND_SELECTOR_HPP_
+#endif //_ONEMATH_BACKEND_SELECTOR_HPP_
diff --git a/include/oneapi/mkl/detail/backend_selector_predicates.hpp b/include/oneapi/math/detail/backend_selector_predicates.hpp
similarity index 88%
rename from include/oneapi/mkl/detail/backend_selector_predicates.hpp
rename to include/oneapi/math/detail/backend_selector_predicates.hpp
index 4ee3f3bb1..3a08f980e 100644
--- a/include/oneapi/mkl/detail/backend_selector_predicates.hpp
+++ b/include/oneapi/math/detail/backend_selector_predicates.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BACKEND_SELECTOR_PREDICATES_HPP_
-#define _ONEMKL_BACKEND_SELECTOR_PREDICATES_HPP_
+#ifndef _ONEMATH_BACKEND_SELECTOR_PREDICATES_HPP_
+#define _ONEMATH_BACKEND_SELECTOR_PREDICATES_HPP_
 
 #include <cstdint>
 #if __has_include(<sycl/sycl.hpp>)
@@ -27,19 +27,19 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/detail/backends.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 template <backend Backend>
 inline void backend_selector_precondition(sycl::queue&) {}
 
 template <>
 inline void backend_selector_precondition<backend::netlib>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
 #ifdef __HIPSYCL__
     if (!(queue.is_host() || queue.get_device().is_cpu())) {
 #else
@@ -54,7 +54,7 @@ inline void backend_selector_precondition<backend::netlib>(sycl::queue& queue) {
 
 template <>
 inline void backend_selector_precondition<backend::mklcpu>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
 #ifdef __HIPSYCL__
     if (!(queue.is_host() || queue.get_device().is_cpu())) {
 #else
@@ -69,7 +69,7 @@ inline void backend_selector_precondition<backend::mklcpu>(sycl::queue& queue) {
 
 template <>
 inline void backend_selector_precondition<backend::mklgpu>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     unsigned int vendor_id =
         static_cast<unsigned int>(queue.get_device().get_info<sycl::info::device::vendor_id>());
     if (!(queue.get_device().is_gpu() && vendor_id == INTEL_ID)) {
@@ -82,7 +82,7 @@ inline void backend_selector_precondition<backend::mklgpu>(sycl::queue& queue) {
 
 template <>
 inline void backend_selector_precondition<backend::cublas>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     unsigned int vendor_id =
         static_cast<unsigned int>(queue.get_device().get_info<sycl::info::device::vendor_id>());
     if (!(queue.get_device().is_gpu() && vendor_id == NVIDIA_ID)) {
@@ -95,7 +95,7 @@ inline void backend_selector_precondition<backend::cublas>(sycl::queue& queue) {
 
 template <>
 inline void backend_selector_precondition<backend::cusolver>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     unsigned int vendor_id =
         static_cast<unsigned int>(queue.get_device().get_info<sycl::info::device::vendor_id>());
     if (!(queue.get_device().is_gpu() && vendor_id == NVIDIA_ID)) {
@@ -108,7 +108,7 @@ inline void backend_selector_precondition<backend::cusolver>(sycl::queue& queue)
 
 template <>
 inline void backend_selector_precondition<backend::rocblas>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     unsigned int vendor_id =
         static_cast<unsigned int>(queue.get_device().get_info<sycl::info::device::vendor_id>());
     if (!(queue.get_device().is_gpu() && vendor_id == AMD_ID)) {
@@ -121,7 +121,7 @@ inline void backend_selector_precondition<backend::rocblas>(sycl::queue& queue)
 
 template <>
 inline void backend_selector_precondition<backend::rocrand>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     unsigned int vendor_id =
         static_cast<unsigned int>(queue.get_device().get_info<sycl::info::device::vendor_id>());
     if (!(queue.get_device().is_gpu() && vendor_id == AMD_ID)) {
@@ -134,7 +134,7 @@ inline void backend_selector_precondition<backend::rocrand>(sycl::queue& queue)
 
 template <>
 inline void backend_selector_precondition<backend::rocsolver>(sycl::queue& queue) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     unsigned int vendor_id =
         static_cast<unsigned int>(queue.get_device().get_info<sycl::info::device::vendor_id>());
     if (!(queue.get_device().is_gpu() && vendor_id == AMD_ID)) {
@@ -144,7 +144,7 @@ inline void backend_selector_precondition<backend::rocsolver>(sycl::queue& queue
     }
 #endif
 }
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_BACKEND_SELECTOR_PREDICATES_HPP_
+#endif //_ONEMATH_BACKEND_SELECTOR_PREDICATES_HPP_
diff --git a/include/oneapi/mkl/detail/backends.hpp b/include/oneapi/math/detail/backends.hpp
similarity index 94%
rename from include/oneapi/mkl/detail/backends.hpp
rename to include/oneapi/math/detail/backends.hpp
index 216a6feba..e8f66e021 100644
--- a/include/oneapi/mkl/detail/backends.hpp
+++ b/include/oneapi/math/detail/backends.hpp
@@ -17,14 +17,14 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BACKENDS_HPP_
-#define _ONEMKL_BACKENDS_HPP_
+#ifndef _ONEMATH_BACKENDS_HPP_
+#define _ONEMATH_BACKENDS_HPP_
 
 #include <map>
 #include <string>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 enum class backend {
     mklcpu,
@@ -66,7 +66,7 @@ static backendmap backend_map = { { backend::mklcpu, "mklcpu" },
                                   { backend::unsupported, "unsupported" } };
 // clang-format on
 
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_BACKENDS_HPP_
+#endif //_ONEMATH_BACKENDS_HPP_
diff --git a/include/oneapi/mkl/detail/backends_table.hpp b/include/oneapi/math/detail/backends_table.hpp
similarity index 73%
rename from include/oneapi/mkl/detail/backends_table.hpp
rename to include/oneapi/math/detail/backends_table.hpp
index 9b7c921d6..1b7e1d723 100644
--- a/include/oneapi/mkl/detail/backends_table.hpp
+++ b/include/oneapi/math/detail/backends_table.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BACKENDS_TABLE_HPP_
-#define _ONEMKL_BACKENDS_TABLE_HPP_
+#ifndef _ONEMATH_BACKENDS_TABLE_HPP_
+#define _ONEMATH_BACKENDS_TABLE_HPP_
 
 #include <string>
 #include <vector>
@@ -29,16 +29,16 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math/detail/config.hpp"
 
 #ifdef __linux__
-#define LIB_NAME(a) "libonemkl_" a ".so"
+#define LIB_NAME(a) "libonemath_" a ".so"
 #elif defined(_WIN64)
-#define LIB_NAME(a) "onemkl_" a ".dll"
+#define LIB_NAME(a) "onemath_" a ".dll"
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu, generic_device };
 enum class domain : uint16_t { blas, dft, lapack, rng, sparse_blas };
@@ -47,40 +47,40 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
     { domain::blas,
       { { device::x86cpu,
           {
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
               LIB_NAME("blas_mklcpu"),
 #endif
-#ifdef ONEMKL_ENABLE_NETLIB_BACKEND
+#ifdef ONEMATH_ENABLE_NETLIB_BACKEND
               LIB_NAME("blas_netlib"),
 #endif
-#ifdef ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU
+#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU
               LIB_NAME("blas_portblas"),
 #endif
           } },
         { device::intelgpu,
           {
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
               LIB_NAME("blas_mklgpu"),
 #endif
-#ifdef ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU
+#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU
               LIB_NAME("blas_portblas"),
 #endif
           } },
         { device::amdgpu,
           {
-#ifdef ONEMKL_ENABLE_ROCBLAS_BACKEND
+#ifdef ONEMATH_ENABLE_ROCBLAS_BACKEND
               LIB_NAME("blas_rocblas"),
 #endif
-#ifdef ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU
+#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU
               LIB_NAME("blas_portblas"),
 #endif
           } },
         { device::nvidiagpu,
           {
-#ifdef ONEMKL_ENABLE_CUBLAS_BACKEND
+#ifdef ONEMATH_ENABLE_CUBLAS_BACKEND
               LIB_NAME("blas_cublas"),
 #endif
-#ifdef ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU
+#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU
               LIB_NAME("blas_portblas"),
 #endif
           } },
@@ -94,37 +94,37 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
     { domain::dft,
       { { device::x86cpu,
           {
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
               LIB_NAME("dft_mklcpu")
 #endif
-#ifdef ONEMKL_ENABLE_PORTFFT_BACKEND
+#ifdef ONEMATH_ENABLE_PORTFFT_BACKEND
                   LIB_NAME("dft_portfft")
 #endif
           } },
         { device::intelgpu,
           {
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
               LIB_NAME("dft_mklgpu")
 #endif
-#ifdef ONEMKL_ENABLE_PORTFFT_BACKEND
+#ifdef ONEMATH_ENABLE_PORTFFT_BACKEND
                   LIB_NAME("dft_portfft")
 #endif
           } },
         { device::amdgpu,
           {
-#ifdef ONEMKL_ENABLE_ROCFFT_BACKEND
+#ifdef ONEMATH_ENABLE_ROCFFT_BACKEND
               LIB_NAME("dft_rocfft")
 #endif
-#ifdef ONEMKL_ENABLE_PORTFFT_BACKEND
+#ifdef ONEMATH_ENABLE_PORTFFT_BACKEND
                   LIB_NAME("dft_portfft")
 #endif
           } },
         { device::nvidiagpu,
           {
-#ifdef ONEMKL_ENABLE_CUFFT_BACKEND
+#ifdef ONEMATH_ENABLE_CUFFT_BACKEND
               LIB_NAME("dft_cufft")
 #endif
-#ifdef ONEMKL_ENABLE_PORTFFT_BACKEND
+#ifdef ONEMATH_ENABLE_PORTFFT_BACKEND
                   LIB_NAME("dft_portfft")
 #endif
           } },
@@ -138,25 +138,25 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
     { domain::lapack,
       { { device::x86cpu,
           {
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
               LIB_NAME("lapack_mklcpu")
 #endif
           } },
         { device::intelgpu,
           {
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
               LIB_NAME("lapack_mklgpu")
 #endif
           } },
         { device::amdgpu,
           {
-#ifdef ONEMKL_ENABLE_ROCSOLVER_BACKEND
+#ifdef ONEMATH_ENABLE_ROCSOLVER_BACKEND
               LIB_NAME("lapack_rocsolver")
 #endif
           } },
         { device::nvidiagpu,
           {
-#ifdef ONEMKL_ENABLE_CUSOLVER_BACKEND
+#ifdef ONEMATH_ENABLE_CUSOLVER_BACKEND
               LIB_NAME("lapack_cusolver")
 #endif
           } } } },
@@ -164,25 +164,25 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
     { domain::rng,
       { { device::x86cpu,
           {
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
               LIB_NAME("rng_mklcpu")
 #endif
           } },
         { device::intelgpu,
           {
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
               LIB_NAME("rng_mklgpu")
 #endif
           } },
         { device::amdgpu,
           {
-#ifdef ONEMKL_ENABLE_ROCRAND_BACKEND
+#ifdef ONEMATH_ENABLE_ROCRAND_BACKEND
               LIB_NAME("rng_rocrand")
 #endif
           } },
         { device::nvidiagpu,
           {
-#ifdef ONEMKL_ENABLE_CURAND_BACKEND
+#ifdef ONEMATH_ENABLE_CURAND_BACKEND
               LIB_NAME("rng_curand")
 #endif
           } } } },
@@ -190,32 +190,32 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
     { domain::sparse_blas,
       { { device::x86cpu,
           {
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
               LIB_NAME("sparse_blas_mklcpu")
 #endif
           } },
         { device::intelgpu,
           {
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
               LIB_NAME("sparse_blas_mklgpu")
 #endif
           } },
         { device::nvidiagpu,
           {
-#ifdef ONEMKL_ENABLE_CUSPARSE_BACKEND
+#ifdef ONEMATH_ENABLE_CUSPARSE_BACKEND
               LIB_NAME("sparse_blas_cusparse")
 #endif
           } } } },
 };
 
-static std::map<domain, const char*> table_names = { { domain::blas, "mkl_blas_table" },
-                                                     { domain::lapack, "mkl_lapack_table" },
-                                                     { domain::dft, "mkl_dft_table" },
-                                                     { domain::rng, "mkl_rng_table" },
+static std::map<domain, const char*> table_names = { { domain::blas, "onemath_blas_table" },
+                                                     { domain::lapack, "onemath_lapack_table" },
+                                                     { domain::dft, "onemath_dft_table" },
+                                                     { domain::rng, "onemath_rng_table" },
                                                      { domain::sparse_blas,
-                                                       "mkl_sparse_blas_table" } };
+                                                       "onemath_sparse_blas_table" } };
 
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_BACKENDS_TABLE_HPP_
+#endif //_ONEMATH_BACKENDS_TABLE_HPP_
diff --git a/include/oneapi/mkl/detail/exceptions.hpp b/include/oneapi/math/detail/exceptions.hpp
similarity index 54%
rename from include/oneapi/mkl/detail/exceptions.hpp
rename to include/oneapi/math/detail/exceptions.hpp
index 18eeca5b1..eb8cc0b12 100644
--- a/include/oneapi/mkl/detail/exceptions.hpp
+++ b/include/oneapi/math/detail/exceptions.hpp
@@ -17,43 +17,54 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-// These are oneAPI Math Kernel Library (oneMKL) Interfaces specific exceptions
+// These are oneMath specific exceptions with no equivalent in Intel(R) oneMKL
 
-#ifndef _ONEMKL_DETAIL_EXCEPTIONS_HPP_
-#define _ONEMKL_DETAIL_EXCEPTIONS_HPP_
+#ifndef _ONEMATH_DETAIL_EXCEPTIONS_HPP_
+#define _ONEMATH_DETAIL_EXCEPTIONS_HPP_
 
 #include <exception>
 #include <string>
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
-class backend_not_found : public oneapi::mkl::exception {
+class backend_not_found : public oneapi::math::exception {
 public:
     backend_not_found(const std::string& info = "")
-            : oneapi::mkl::exception(
+            : oneapi::math::exception(
                   "", "", ((info.length() != 0) ? info : "Couldn't load selected backend")) {}
 };
 
-class function_not_found : public oneapi::mkl::exception {
+class function_not_found : public oneapi::math::exception {
 public:
     function_not_found(const std::string& info = "")
-            : oneapi::mkl::exception(
+            : oneapi::math::exception(
                   "", "",
                   ((info.length() != 0) ? info : "Couldn't load functions from selected backend")) {
     }
 };
 
-class specification_mismatch : public oneapi::mkl::exception {
+class library_not_found : public oneapi::math::exception {
+public:
+    library_not_found(const std::string& message) : exception(message) {}
+    library_not_found(const std::string& domain, const std::string& function,
+                      const std::string& info = "")
+            : oneapi::math::exception(
+                  domain, function,
+                  "library not found" + ((info.length() != 0) ? (": " + info) : "")) {}
+};
+
+class specification_mismatch : public oneapi::math::exception {
 public:
     specification_mismatch(const std::string& info = "")
-            : oneapi::mkl::exception(
+            : oneapi::math::exception(
                   "", "",
-                  ((info.length() != 0) ? info : "Loaded oneMKL specification version mismatch")) {}
+                  ((info.length() != 0) ? info : "Loaded oneMath specification version mismatch")) {
+    }
 };
 
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_DETAIL_EXCEPTIONS_HPP_
+#endif // _ONEMATH_DETAIL_EXCEPTIONS_HPP_
diff --git a/include/oneapi/mkl/detail/export.hpp b/include/oneapi/math/detail/export.hpp
similarity index 70%
rename from include/oneapi/mkl/detail/export.hpp
rename to include/oneapi/math/detail/export.hpp
index 55f6a7b15..851fe9d2f 100644
--- a/include/oneapi/mkl/detail/export.hpp
+++ b/include/oneapi/math/detail/export.hpp
@@ -17,24 +17,24 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef ONEMKL_EXPORT_H
-#define ONEMKL_EXPORT_H
+#ifndef ONEMATH_EXPORT_H
+#define ONEMATH_EXPORT_H
 
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math/detail/config.hpp"
 
-#if !defined(ONEMKL_BUILD_SHARED_LIBS) || !defined(_WIN64)
-#define ONEMKL_EXPORT
-#define ONEMKL_NO_EXPORT
+#if !defined(ONEMATH_BUILD_SHARED_LIBS) || !defined(_WIN64)
+#define ONEMATH_EXPORT
+#define ONEMATH_NO_EXPORT
 #else
-#ifndef ONEMKL_EXPORT
-#ifdef onemkl_EXPORTS
+#ifndef ONEMATH_EXPORT
+#ifdef onemath_EXPORTS
 /* We are building this library */
-#define ONEMKL_EXPORT __declspec(dllexport)
+#define ONEMATH_EXPORT __declspec(dllexport)
 #else
 /* We are using this library */
-#define ONEMKL_EXPORT __declspec(dllimport)
+#define ONEMATH_EXPORT __declspec(dllimport)
 #endif
 #endif
 #endif
 
-#endif /* ONEMKL_EXPORT_H */
+#endif /* ONEMATH_EXPORT_H */
diff --git a/include/oneapi/mkl/detail/get_device_id.hpp b/include/oneapi/math/detail/get_device_id.hpp
similarity index 84%
rename from include/oneapi/mkl/detail/get_device_id.hpp
rename to include/oneapi/math/detail/get_device_id.hpp
index 2eb9f07c9..32d493059 100644
--- a/include/oneapi/mkl/detail/get_device_id.hpp
+++ b/include/oneapi/math/detail/get_device_id.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_GET_DEVICE_ID_HPP_
-#define _ONEMKL_GET_DEVICE_ID_HPP_
+#ifndef _ONEMATH_GET_DEVICE_ID_HPP_
+#define _ONEMATH_GET_DEVICE_ID_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,8 +26,8 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/backends_table.hpp"
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/detail/backends_table.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 #define INTEL_ID  32902
 #define NVIDIA_ID 4318
@@ -38,10 +38,10 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
-inline oneapi::mkl::device get_device_id(sycl::queue& queue) {
-    oneapi::mkl::device device_id;
+inline oneapi::math::device get_device_id(sycl::queue& queue) {
+    oneapi::math::device device_id;
     if (queue.get_device().is_cpu())
         device_id = device::x86cpu;
 #ifdef __HIPSYCL__
@@ -67,7 +67,7 @@ inline oneapi::mkl::device get_device_id(sycl::queue& queue) {
     return device_id;
 }
 
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_GET_DEVICE_ID_HPP_
+#endif //_ONEMATH_GET_DEVICE_ID_HPP_
diff --git a/include/oneapi/math/dft.hpp b/include/oneapi/math/dft.hpp
new file mode 100644
index 000000000..99d830bee
--- /dev/null
+++ b/include/oneapi/math/dft.hpp
@@ -0,0 +1,39 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#ifndef _ONEMATH_DFT_HPP_
+#define _ONEMATH_DFT_HPP_
+
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
+#include <CL/sycl.hpp>
+#endif
+#include <complex>
+#include <cstdint>
+
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
+#include "oneapi/math/dft/detail/dft_loader.hpp"
+
+#include "oneapi/math/dft/descriptor.hpp"
+#include "oneapi/math/dft/forward.hpp"
+#include "oneapi/math/dft/backward.hpp"
+
+#endif // _ONEMATH_DFT_HPP_
diff --git a/include/oneapi/mkl/dft/backward.hpp b/include/oneapi/math/dft/backward.hpp
similarity index 98%
rename from include/oneapi/mkl/dft/backward.hpp
rename to include/oneapi/math/dft/backward.hpp
index becca85d0..6d7f7f122 100644
--- a/include/oneapi/mkl/dft/backward.hpp
+++ b/include/oneapi/math/dft/backward.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_BACKWARD_HPP_
-#define _ONEMKL_DFT_BACKWARD_HPP_
+#ifndef _ONEMATH_DFT_BACKWARD_HPP_
+#define _ONEMATH_DFT_BACKWARD_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -28,7 +28,7 @@
 
 #include "detail/types_impl.hpp"
 
-namespace oneapi::mkl::dft {
+namespace oneapi::math::dft {
 //Buffer version
 
 //In-place transform
@@ -157,6 +157,6 @@ sycl::event compute_backward(descriptor_type& desc, input_type* in_re, input_typ
                                             reinterpret_cast<scalar_type*>(out_re),
                                             reinterpret_cast<scalar_type*>(out_im), dependencies);
 }
-} // namespace oneapi::mkl::dft
+} // namespace oneapi::math::dft
 
-#endif // _ONEMKL_DFT_BACKWARD_HPP_
+#endif // _ONEMATH_DFT_BACKWARD_HPP_
diff --git a/include/oneapi/mkl/dft/descriptor.hpp b/include/oneapi/math/dft/descriptor.hpp
similarity index 88%
rename from include/oneapi/mkl/dft/descriptor.hpp
rename to include/oneapi/math/dft/descriptor.hpp
index fb618fd42..e12d4ced1 100644
--- a/include/oneapi/mkl/dft/descriptor.hpp
+++ b/include/oneapi/math/dft/descriptor.hpp
@@ -17,14 +17,14 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_DESCRIPTOR_HPP_
-#define _ONEMKL_DFT_DESCRIPTOR_HPP_
+#ifndef _ONEMATH_DFT_DESCRIPTOR_HPP_
+#define _ONEMATH_DFT_DESCRIPTOR_HPP_
 
 #include "detail/descriptor_impl.hpp"
 #include "types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 /** The detail namespace is required since the MKLGPU backend uses identical 
 names and function signatures in many places. **/
@@ -32,7 +32,7 @@ names and function signatures in many places. **/
 template <precision prec, domain dom>
 using descriptor = detail::descriptor<prec, dom>;
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_DFT_DESCRIPTOR_HPP_
+#endif // _ONEMATH_DFT_DESCRIPTOR_HPP_
diff --git a/include/oneapi/mkl/dft/detail/commit_impl.hpp b/include/oneapi/math/dft/detail/commit_impl.hpp
similarity index 94%
rename from include/oneapi/mkl/dft/detail/commit_impl.hpp
rename to include/oneapi/math/dft/detail/commit_impl.hpp
index 0c1a1e0b2..6aaa92580 100644
--- a/include/oneapi/mkl/dft/detail/commit_impl.hpp
+++ b/include/oneapi/math/dft/detail/commit_impl.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_COMMIT_IMPL_HPP_
-#define _ONEMKL_DFT_COMMIT_IMPL_HPP_
+#ifndef _ONEMATH_DFT_COMMIT_IMPL_HPP_
+#define _ONEMATH_DFT_COMMIT_IMPL_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -29,11 +29,11 @@
 #include "descriptor_impl.hpp"
 #include "external_workspace_helper.hpp"
 
-namespace oneapi::mkl {
+namespace oneapi::math {
 enum class backend;
 }
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 template <precision prec, domain dom>
 class dft_values;
@@ -41,10 +41,10 @@ class dft_values;
 template <precision prec, domain dom>
 class commit_impl {
     sycl::queue queue_;
-    mkl::backend backend_;
+    math::backend backend_;
 
 public:
-    using descriptor_type = typename oneapi::mkl::dft::detail::descriptor<prec, dom>;
+    using descriptor_type = typename oneapi::math::dft::detail::descriptor<prec, dom>;
     using fwd_type = typename descriptor_info<descriptor_type>::forward_type;
     using bwd_type = typename descriptor_info<descriptor_type>::backward_type;
     using scalar_type = typename descriptor_info<descriptor_type>::scalar_type;
@@ -53,7 +53,7 @@ class commit_impl {
     external_workspace_helper<prec, dom> external_workspace_helper_;
 
 public:
-    commit_impl(sycl::queue queue, mkl::backend backend,
+    commit_impl(sycl::queue queue, math::backend backend,
                 const dft::detail::dft_values<prec, dom>& config_values)
             : queue_(queue),
               backend_(backend),
@@ -69,7 +69,7 @@ class commit_impl {
         return queue_;
     }
 
-    mkl::backend get_backend() const noexcept {
+    math::backend get_backend() const noexcept {
         return backend_;
     }
 
@@ -179,6 +179,6 @@ class commit_impl {
     }
 };
 
-} // namespace oneapi::mkl::dft::detail
+} // namespace oneapi::math::dft::detail
 
-#endif //_ONEMKL_DFT_COMMIT_IMPL_HPP_
+#endif //_ONEMATH_DFT_COMMIT_IMPL_HPP_
diff --git a/include/oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp b/include/oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp
similarity index 74%
rename from include/oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp
rename to include/oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp
index fe3305680..c8b463d76 100644
--- a/include/oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp
+++ b/include/oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_ROCFFT_HPP_
-#define _ONEMKL_DFT_ROCFFT_HPP_
+#ifndef _ONEMATH_DFT_CUFFT_HPP_
+#define _ONEMATH_DFT_CUFFT_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,13 +26,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
-namespace oneapi::mkl::dft::rocfft {
+namespace oneapi::math::dft::cufft {
 
-#include "oneapi/mkl/dft/detail/dft_ct.hxx"
+#include "oneapi/math/dft/detail/dft_ct.hxx"
 
-} // namespace oneapi::mkl::dft::rocfft
+} // namespace oneapi::math::dft::cufft
 
-#endif // _ONEMKL_DFT_ROCFFT_HPP_
+#endif // _ONEMATH_DFT_CUFFT_HPP_
diff --git a/include/oneapi/mkl/dft/detail/descriptor_impl.hpp b/include/oneapi/math/dft/detail/descriptor_impl.hpp
similarity index 80%
rename from include/oneapi/mkl/dft/detail/descriptor_impl.hpp
rename to include/oneapi/math/dft/detail/descriptor_impl.hpp
index b2095c49b..6267eb86d 100644
--- a/include/oneapi/mkl/dft/detail/descriptor_impl.hpp
+++ b/include/oneapi/math/dft/detail/descriptor_impl.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_DETAIL_DESCRIPTOR_IMPL_HPP_
-#define _ONEMKL_DFT_DETAIL_DESCRIPTOR_IMPL_HPP_
+#ifndef _ONEMATH_DFT_DETAIL_DESCRIPTOR_IMPL_HPP_
+#define _ONEMATH_DFT_DETAIL_DESCRIPTOR_IMPL_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,14 +26,14 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/detail/export.hpp"
 
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace detail {
 // Forward declaration:
@@ -55,9 +55,9 @@ class descriptor {
     // Syntax for d-dimensional DFT
     descriptor(std::vector<std::int64_t> dimensions);
 
-    // Copy operations are included in the oneAPI oneMKL specification, but not yet
+    // Copy operations are included in the oneAPI oneMath specification, but not yet
     // implemented here. If you need copies, please open an issue at
-    // https://github.com/oneapi-src/oneMKL/issues
+    // https://github.com/uxlfoundation/oneMath/issues
 
     descriptor(descriptor&&);
 
@@ -71,23 +71,23 @@ class descriptor {
 
     void commit(sycl::queue& queue);
 
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
     void commit(backend_selector<backend::mklcpu> selector);
 #endif
 
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
     void commit(backend_selector<backend::mklgpu> selector);
 #endif
 
-#ifdef ONEMKL_ENABLE_CUFFT_BACKEND
+#ifdef ONEMATH_ENABLE_CUFFT_BACKEND
     void commit(backend_selector<backend::cufft> selector);
 #endif
 
-#ifdef ONEMKL_ENABLE_ROCFFT_BACKEND
+#ifdef ONEMATH_ENABLE_ROCFFT_BACKEND
     void commit(backend_selector<backend::rocfft> selector);
 #endif
 
-#ifdef ONEMKL_ENABLE_PORTFFT_BACKEND
+#ifdef ONEMATH_ENABLE_PORTFFT_BACKEND
     void commit(backend_selector<backend::portfft> selector);
 #endif
 
@@ -118,7 +118,7 @@ inline commit_impl<prec, dom>* get_commit(descriptor<prec, dom>& desc) {
 
 } // namespace detail
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_DFT_DETAIL_DESCRIPTOR_IMPL_HPP_
+#endif // _ONEMATH_DFT_DETAIL_DESCRIPTOR_IMPL_HPP_
diff --git a/include/oneapi/mkl/dft/detail/dft_ct.hxx b/include/oneapi/math/dft/detail/dft_ct.hxx
similarity index 60%
rename from include/oneapi/mkl/dft/detail/dft_ct.hxx
rename to include/oneapi/math/dft/detail/dft_ct.hxx
index 7fc2921e4..e03298c5e 100644
--- a/include/oneapi/mkl/dft/detail/dft_ct.hxx
+++ b/include/oneapi/math/dft/detail/dft_ct.hxx
@@ -20,7 +20,7 @@
 // Commit
 
 template <dft::detail::precision prec, dft::detail::domain dom>
-ONEMKL_EXPORT dft::detail::commit_impl<prec, dom>* create_commit(
+ONEMATH_EXPORT dft::detail::commit_impl<prec, dom>* create_commit(
     const dft::detail::descriptor<prec, dom>& desc, sycl::queue& sycl_queue);
 
 // BUFFER version
@@ -34,105 +34,107 @@ using bwd = typename detail::descriptor_info<descriptor_type>::backward_type;
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<fwd<descriptor_type>, 1>& inout);
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& inout);
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& inout_im);
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_im);
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descriptor_type>, 1>& in,
-                                   sycl::buffer<bwd<descriptor_type>, 1>& out);
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& in,
+                                    sycl::buffer<bwd<descriptor_type>, 1>& out);
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& in_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& in_im,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& out_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& out_im);
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& in_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& in_im,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& out_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& out_im);
 
 //USM version
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                          const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                           const std::vector<sycl::event>& dependencies);
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
-                                          scalar<descriptor_type>* inout_im,
-                                          const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
+                                           scalar<descriptor_type>* inout_im,
+                                           const std::vector<sycl::event>& dependencies);
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
-                                          bwd<descriptor_type>* out,
-                                          const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
+                                           bwd<descriptor_type>* out,
+                                           const std::vector<sycl::event>& dependencies);
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* in_re,
-                                          scalar<descriptor_type>* in_im,
-                                          scalar<descriptor_type>* out_re,
-                                          scalar<descriptor_type>* out_im,
-                                          const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* in_re,
+                                           scalar<descriptor_type>* in_im,
+                                           scalar<descriptor_type>* out_re,
+                                           scalar<descriptor_type>* out_im,
+                                           const std::vector<sycl::event>& dependencies);
 
 // BUFFER version
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& inout);
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& inout);
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_im);
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& inout_im);
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<bwd<descriptor_type>, 1>& in,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& out);
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<bwd<descriptor_type>, 1>& in,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& out);
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& in_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& in_im,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& out_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& out_im);
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& in_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& in_im,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& out_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& out_im);
 
 //USM version
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                           const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                            const std::vector<sycl::event>& dependencies);
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
-                                           scalar<descriptor_type>* inout_im,
-                                           const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc,
+                                            scalar<descriptor_type>* inout_re,
+                                            scalar<descriptor_type>* inout_im,
+                                            const std::vector<sycl::event>& dependencies);
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
-                                           fwd<descriptor_type>* out,
-                                           const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
+                                            fwd<descriptor_type>* out,
+                                            const std::vector<sycl::event>& dependencies);
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* in_re,
-                                           scalar<descriptor_type>* in_im,
-                                           scalar<descriptor_type>* out_re,
-                                           scalar<descriptor_type>* out_im,
-                                           const std::vector<sycl::event>& dependencies);
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* in_re,
+                                            scalar<descriptor_type>* in_im,
+                                            scalar<descriptor_type>* out_re,
+                                            scalar<descriptor_type>* out_im,
+                                            const std::vector<sycl::event>& dependencies);
diff --git a/include/oneapi/mkl/dft/detail/dft_loader.hpp b/include/oneapi/math/dft/detail/dft_loader.hpp
similarity index 74%
rename from include/oneapi/mkl/dft/detail/dft_loader.hpp
rename to include/oneapi/math/dft/detail/dft_loader.hpp
index f84a4e01c..34fb2da56 100644
--- a/include/oneapi/mkl/dft/detail/dft_loader.hpp
+++ b/include/oneapi/math/dft/detail/dft_loader.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_LOADER_HPP_
-#define _ONEMKL_DFT_LOADER_HPP_
+#ifndef _ONEMATH_DFT_LOADER_HPP_
+#define _ONEMATH_DFT_LOADER_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,11 +26,11 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace detail {
 
@@ -41,12 +41,12 @@ template <precision prec, domain dom>
 class descriptor;
 
 template <precision prec, domain dom>
-ONEMKL_EXPORT commit_impl<prec, dom>* create_commit(const descriptor<prec, dom>& desc,
-                                                    sycl::queue& queue);
+ONEMATH_EXPORT commit_impl<prec, dom>* create_commit(const descriptor<prec, dom>& desc,
+                                                     sycl::queue& queue);
 
 } // namespace detail
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_DFT_LOADER_HPP_
+#endif //_ONEMATH_DFT_LOADER_HPP_
diff --git a/include/oneapi/mkl/dft/detail/external_workspace_helper.hpp b/include/oneapi/math/dft/detail/external_workspace_helper.hpp
similarity index 88%
rename from include/oneapi/mkl/dft/detail/external_workspace_helper.hpp
rename to include/oneapi/math/dft/detail/external_workspace_helper.hpp
index b41dffc4c..31d1d7ffc 100644
--- a/include/oneapi/mkl/dft/detail/external_workspace_helper.hpp
+++ b/include/oneapi/math/dft/detail/external_workspace_helper.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_EXTERNAL_WORKSPACE_HELPER_HPP_
-#define _ONEMKL_DFT_EXTERNAL_WORKSPACE_HELPER_HPP_
+#ifndef _ONEMATH_DFT_EXTERNAL_WORKSPACE_HELPER_HPP_
+#define _ONEMATH_DFT_EXTERNAL_WORKSPACE_HELPER_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,12 +26,12 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace detail {
 
@@ -90,8 +90,8 @@ class external_workspace_helper {
     */
     void set_workspace_throw(commit_impl_t& committed_desc, scalar_t* usm_workspace) {
         if (get_rqd_workspace_bytes(committed_desc) > 0 && usm_workspace == nullptr) {
-            throw mkl::invalid_argument("DFT", "set_workspace",
-                                        "Backend expected a non-null workspace pointer.");
+            throw math::invalid_argument("DFT", "set_workspace",
+                                         "Backend expected a non-null workspace pointer.");
         }
         m_ext_workspace_rqd = true;
         m_workspace_type = ext_workspace_type::usm;
@@ -105,12 +105,12 @@ class external_workspace_helper {
                              sycl::buffer<scalar_t>& buffer_workspace) {
         if (static_cast<std::size_t>(get_rqd_workspace_bytes(committed_desc)) / sizeof(scalar_t) >
             buffer_workspace.size()) {
-            throw mkl::invalid_argument("DFT", "set_workspace", "Provided workspace is too small");
+            throw math::invalid_argument("DFT", "set_workspace", "Provided workspace is too small");
             return;
         }
         if (buffer_workspace.is_sub_buffer()) {
-            throw mkl::invalid_argument("DFT", "set_workspace",
-                                        "Cannot use sub-buffers for workspace");
+            throw math::invalid_argument("DFT", "set_workspace",
+                                         "Cannot use sub-buffers for workspace");
             return;
         }
         m_ext_workspace_rqd = true;
@@ -137,7 +137,7 @@ class external_workspace_helper {
                 }
             }
             else {
-                throw mkl::invalid_argument(
+                throw math::invalid_argument(
                     "DFT", function_name,
                     "Buffer external workspace must be used with buffer compute calls");
             }
@@ -169,7 +169,7 @@ class external_workspace_helper {
     */
     void usm_compute_call_throw(const char* function_name) const {
         if (m_ext_workspace_rqd && m_workspace_type != ext_workspace_type::usm) {
-            throw mkl::invalid_argument(
+            throw math::invalid_argument(
                 "DFT", function_name, "USM external workspace must be used with usm compute calls");
         }
     }
@@ -179,7 +179,7 @@ class external_workspace_helper {
     */
     void buffer_compute_call_throw(const char* function_name) const {
         if (m_ext_workspace_rqd && m_workspace_type != ext_workspace_type::buffer) {
-            throw mkl::invalid_argument(
+            throw math::invalid_argument(
                 "DFT", function_name,
                 "Buffer external workspace must be used with buffer compute calls");
         }
@@ -188,7 +188,7 @@ class external_workspace_helper {
 
 } // namespace detail
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_DFT_EXTERNAL_WORKSPACE_HELPER_HPP_
+#endif //_ONEMATH_DFT_EXTERNAL_WORKSPACE_HELPER_HPP_
diff --git a/include/oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp b/include/oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp
similarity index 73%
rename from include/oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp
rename to include/oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp
index 00d4dd47b..9cc8ef8c1 100644
--- a/include/oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp
+++ b/include/oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_MKLCPU_HPP_
-#define _ONEMKL_DFT_MKLCPU_HPP_
+#ifndef _ONEMATH_DFT_MKLCPU_HPP_
+#define _ONEMATH_DFT_MKLCPU_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,13 +26,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
-namespace oneapi::mkl::dft::mklcpu {
+namespace oneapi::math::dft::mklcpu {
 
-#include "oneapi/mkl/dft/detail/dft_ct.hxx"
+#include "oneapi/math/dft/detail/dft_ct.hxx"
 
-} // namespace oneapi::mkl::dft::mklcpu
+} // namespace oneapi::math::dft::mklcpu
 
-#endif // _ONEMKL_DFT_MKLCPU_HPP_
+#endif // _ONEMATH_DFT_MKLCPU_HPP_
diff --git a/include/oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp b/include/oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp
similarity index 73%
rename from include/oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp
rename to include/oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp
index 56a55a9f7..3a2325178 100644
--- a/include/oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp
+++ b/include/oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_MKLGPU_HPP_
-#define _ONEMKL_DFT_MKLGPU_HPP_
+#ifndef _ONEMATH_DFT_MKLGPU_HPP_
+#define _ONEMATH_DFT_MKLGPU_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,13 +26,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
-namespace oneapi::mkl::dft::mklgpu {
+namespace oneapi::math::dft::mklgpu {
 
-#include "oneapi/mkl/dft/detail/dft_ct.hxx"
+#include "oneapi/math/dft/detail/dft_ct.hxx"
 
-} // namespace oneapi::mkl::dft::mklgpu
+} // namespace oneapi::math::dft::mklgpu
 
-#endif // _ONEMKL_DFT_MKLGPU_HPP_
+#endif // _ONEMATH_DFT_MKLGPU_HPP_
diff --git a/include/oneapi/mkl/dft/detail/portfft/onemkl_dft_portfft.hpp b/include/oneapi/math/dft/detail/portfft/onemath_dft_portfft.hpp
similarity index 76%
rename from include/oneapi/mkl/dft/detail/portfft/onemkl_dft_portfft.hpp
rename to include/oneapi/math/dft/detail/portfft/onemath_dft_portfft.hpp
index 4617e8a5c..3739bc1aa 100644
--- a/include/oneapi/mkl/dft/detail/portfft/onemkl_dft_portfft.hpp
+++ b/include/oneapi/math/dft/detail/portfft/onemath_dft_portfft.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_PORTFFT_HPP_
-#define _ONEMKL_DFT_PORTFFT_HPP_
+#ifndef _ONEMATH_DFT_PORTFFT_HPP_
+#define _ONEMATH_DFT_PORTFFT_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,14 +26,14 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
-namespace oneapi::mkl::dft::portfft {
+namespace oneapi::math::dft::portfft {
 
 // We don't need the forward declarations of compute_xxxward templates (just need the create_commit template), but it doesn't hurt and keeps things simple.
-#include "oneapi/mkl/dft/detail/dft_ct.hxx"
+#include "oneapi/math/dft/detail/dft_ct.hxx"
 
-} // namespace oneapi::mkl::dft::portfft
+} // namespace oneapi::math::dft::portfft
 
-#endif // _ONEMKL_DFT_PORTFFT_HPP_
+#endif // _ONEMATH_DFT_PORTFFT_HPP_
diff --git a/include/oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp b/include/oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp
similarity index 73%
rename from include/oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp
rename to include/oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp
index 4e4ad2030..fb5b4b276 100644
--- a/include/oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp
+++ b/include/oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_CUFFT_HPP_
-#define _ONEMKL_DFT_CUFFT_HPP_
+#ifndef _ONEMATH_DFT_ROCFFT_HPP_
+#define _ONEMATH_DFT_ROCFFT_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,13 +26,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
-namespace oneapi::mkl::dft::cufft {
+namespace oneapi::math::dft::rocfft {
 
-#include "oneapi/mkl/dft/detail/dft_ct.hxx"
+#include "oneapi/math/dft/detail/dft_ct.hxx"
 
-} // namespace oneapi::mkl::dft::cufft
+} // namespace oneapi::math::dft::rocfft
 
-#endif // _ONEMKL_DFT_CUFFT_HPP_
+#endif // _ONEMATH_DFT_ROCFFT_HPP_
diff --git a/include/oneapi/mkl/dft/detail/types_impl.hpp b/include/oneapi/math/dft/detail/types_impl.hpp
similarity index 97%
rename from include/oneapi/mkl/dft/detail/types_impl.hpp
rename to include/oneapi/math/dft/detail/types_impl.hpp
index 5dad2302e..233a8bd0b 100644
--- a/include/oneapi/mkl/dft/detail/types_impl.hpp
+++ b/include/oneapi/math/dft/detail/types_impl.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DETAIL_TYPES_IMPL_HPP_
-#define _ONEMKL_DETAIL_TYPES_IMPL_HPP_
+#ifndef _ONEMATH_DETAIL_TYPES_IMPL_HPP_
+#define _ONEMATH_DETAIL_TYPES_IMPL_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -32,7 +32,7 @@
 #include <complex>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace detail {
 
@@ -228,7 +228,7 @@ class dft_values {
 
 } // namespace detail
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_DETAIL_TYPES_IMPL_HPP_
+#endif //_ONEMATH_DETAIL_TYPES_IMPL_HPP_
diff --git a/include/oneapi/mkl/dft/forward.hpp b/include/oneapi/math/dft/forward.hpp
similarity index 98%
rename from include/oneapi/mkl/dft/forward.hpp
rename to include/oneapi/math/dft/forward.hpp
index 0eeecd497..297582423 100644
--- a/include/oneapi/mkl/dft/forward.hpp
+++ b/include/oneapi/math/dft/forward.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_FORWARD_HPP_
-#define _ONEMKL_DFT_FORWARD_HPP_
+#ifndef _ONEMATH_DFT_FORWARD_HPP_
+#define _ONEMATH_DFT_FORWARD_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -28,7 +28,7 @@
 
 #include "detail/types_impl.hpp"
 
-namespace oneapi::mkl::dft {
+namespace oneapi::math::dft {
 
 //Buffer version
 
@@ -156,6 +156,6 @@ sycl::event compute_forward(descriptor_type& desc, input_type* in_re, input_type
                                            reinterpret_cast<scalar_type*>(out_re),
                                            reinterpret_cast<scalar_type*>(out_im), dependencies);
 }
-} // namespace oneapi::mkl::dft
+} // namespace oneapi::math::dft
 
-#endif // _ONEMKL_DFT_FORWARD_HPP_
+#endif // _ONEMATH_DFT_FORWARD_HPP_
diff --git a/include/oneapi/mkl/dft/types.hpp b/include/oneapi/math/dft/types.hpp
similarity index 90%
rename from include/oneapi/mkl/dft/types.hpp
rename to include/oneapi/math/dft/types.hpp
index dfbcd3c28..c7529fe76 100644
--- a/include/oneapi/mkl/dft/types.hpp
+++ b/include/oneapi/math/dft/types.hpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_TYPES_HPP_
-#define _ONEMKL_DFT_TYPES_HPP_
+#ifndef _ONEMATH_DFT_TYPES_HPP_
+#define _ONEMATH_DFT_TYPES_HPP_
 
 #include "detail/types_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 
 /** The detail namespace is required since the MKLGPU backend uses identical 
@@ -36,7 +36,7 @@ using config_value = detail::config_value;
 using DFT_ERROR = detail::DFT_ERROR;
 
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_TYPES_HPP_
+#endif //_ONEMATH_TYPES_HPP_
diff --git a/include/oneapi/mkl/exceptions.hpp b/include/oneapi/math/exceptions.hpp
similarity index 58%
rename from include/oneapi/mkl/exceptions.hpp
rename to include/oneapi/math/exceptions.hpp
index 8047f7676..76f37b90f 100644
--- a/include/oneapi/mkl/exceptions.hpp
+++ b/include/oneapi/math/exceptions.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_EXCEPTIONS_HPP_
-#define _ONEMKL_EXCEPTIONS_HPP_
+#ifndef _ONEMATH_EXCEPTIONS_HPP_
+#define _ONEMATH_EXCEPTIONS_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -28,19 +28,20 @@
 #include <exception>
 #include <string>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
-// These are oneAPI oneMKL Specification exceptions
+// These are oneAPI oneMath Specification exceptions
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 class exception : public std::exception {
     std::string msg_;
 
 public:
+    exception(const std::string& message) : std::exception(), msg_(message) {}
     exception(const std::string& domain, const std::string& function, const std::string& info = "")
             : std::exception() {
-        msg_ = std::string("oneMKL: ") + domain +
+        msg_ = std::string("oneMath: ") + domain +
                ((domain.length() != 0 && function.length() != 0) ? "/" : "") + function +
                ((info.length() != 0)
                     ? (((domain.length() + function.length() != 0) ? ": " : "") + info)
@@ -52,79 +53,78 @@ class exception : public std::exception {
     }
 };
 
-class unsupported_device : public oneapi::mkl::exception {
+class unsupported_device : public oneapi::math::exception {
 public:
+    unsupported_device(const std::string& message) : exception(message) {}
     unsupported_device(const std::string& domain, const std::string& function,
                        const sycl::device& device)
-            : oneapi::mkl::exception(
+            : oneapi::math::exception(
                   domain, function,
                   device.get_info<sycl::info::device::name>() + " is not supported") {}
 };
 
-class host_bad_alloc : public oneapi::mkl::exception {
+class host_bad_alloc : public oneapi::math::exception {
 public:
+    host_bad_alloc(const std::string& message) : exception(message) {}
     host_bad_alloc(const std::string& domain, const std::string& function)
-            : oneapi::mkl::exception(domain, function, "cannot allocate memory on host") {}
+            : oneapi::math::exception(domain, function, "cannot allocate memory on host") {}
 };
 
-class device_bad_alloc : public oneapi::mkl::exception {
+class device_bad_alloc : public oneapi::math::exception {
 public:
+    device_bad_alloc(const std::string& message) : exception(message) {}
     device_bad_alloc(const std::string& domain, const std::string& function,
                      const sycl::device& device)
-            : oneapi::mkl::exception(
+            : oneapi::math::exception(
                   domain, function,
                   "cannot allocate memory on " + device.get_info<sycl::info::device::name>()) {}
 };
 
-class unimplemented : public oneapi::mkl::exception {
+class unimplemented : public oneapi::math::exception {
 public:
+    unimplemented(const std::string& message) : exception(message) {}
     unimplemented(const std::string& domain, const std::string& function,
                   const std::string& info = "")
-            : oneapi::mkl::exception(domain, function, "function is not implemented " + info) {}
+            : oneapi::math::exception(domain, function, "function is not implemented " + info) {}
 };
 
-class invalid_argument : public oneapi::mkl::exception {
+class invalid_argument : public oneapi::math::exception {
 public:
+    invalid_argument(const std::string& message) : exception(message) {}
     invalid_argument(const std::string& domain, const std::string& function,
                      const std::string& info = "")
-            : oneapi::mkl::exception(domain, function, "invalid argument " + info) {}
+            : oneapi::math::exception(domain, function, "invalid argument " + info) {}
 };
 
-class uninitialized : public oneapi::mkl::exception {
+class uninitialized : public oneapi::math::exception {
 public:
+    uninitialized(const std::string& message) : exception(message) {}
     uninitialized(const std::string& domain, const std::string& function,
                   const std::string& info = "")
-            : oneapi::mkl::exception(domain, function,
-                                     "handle/descriptor is not initialized " + info) {}
+            : oneapi::math::exception(domain, function,
+                                      "handle/descriptor is not initialized " + info) {}
 };
 
-class computation_error : public oneapi::mkl::exception {
+class computation_error : public oneapi::math::exception {
 public:
+    computation_error(const std::string& message) : exception(message) {}
     computation_error(const std::string& domain, const std::string& function,
                       const std::string& info = "")
-            : oneapi::mkl::exception(
+            : oneapi::math::exception(
                   domain, function,
                   "computation error" + ((info.length() != 0) ? (": " + info) : "")) {}
 };
 
-class batch_error : public oneapi::mkl::exception {
+class batch_error : public oneapi::math::exception {
 public:
+    batch_error(const std::string& message) : exception(message) {}
     batch_error(const std::string& domain, const std::string& function,
                 const std::string& info = "")
-            : oneapi::mkl::exception(domain, function,
-                                     "batch error" + ((info.length() != 0) ? (": " + info) : "")) {}
+            : oneapi::math::exception(
+                  domain, function, "batch error" + ((info.length() != 0) ? (": " + info) : "")) {}
 };
 
-class library_not_found : public oneapi::mkl::exception {
-public:
-    library_not_found(const std::string& domain, const std::string& function,
-                      const std::string& info = "")
-            : oneapi::mkl::exception(
-                  domain, function,
-                  "library not found" + ((info.length() != 0) ? (": " + info) : "")) {}
-};
-
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_EXCEPTIONS_HPP_
+#endif // _ONEMATH_EXCEPTIONS_HPP_
diff --git a/include/oneapi/math/lapack.hpp b/include/oneapi/math/lapack.hpp
new file mode 100644
index 000000000..c862513c4
--- /dev/null
+++ b/include/oneapi/math/lapack.hpp
@@ -0,0 +1,37 @@
+/*******************************************************************************
+* Copyright 2021-2022 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/math/detail/config.hpp"
+
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
+#include "oneapi/math/lapack/detail/mklcpu/lapack_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
+#include "oneapi/math/lapack/detail/mklgpu/lapack_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_CUSOLVER_BACKEND
+#include "oneapi/math/lapack/detail/cusolver/lapack_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_ROCSOLVER_BACKEND
+#include "oneapi/math/lapack/detail/rocsolver/lapack_ct.hpp"
+#endif
+
+#include "oneapi/math/lapack/detail/lapack_rt.hpp"
diff --git a/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hpp b/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hpp
similarity index 84%
rename from include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hpp
rename to include/oneapi/math/lapack/detail/cusolver/lapack_ct.hpp
index 1be0e5895..d0ed74581 100644
--- a/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hpp
+++ b/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hpp
@@ -28,13 +28,13 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 
 #define LAPACK_BACKEND cusolver
@@ -42,7 +42,7 @@ namespace lapack {
 #undef LAPACK_BACKEND
 
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif //_DETAIL_CUSOLVER_LAPACK_CT_HPP_
diff --git a/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hxx b/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hxx
similarity index 69%
rename from include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hxx
rename to include/oneapi/math/lapack/detail/cusolver/lapack_ct.hxx
index 1ba7533c1..62998cae2 100644
--- a/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hxx
+++ b/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hxx
@@ -1,7 +1,5 @@
 /***************************************************************************
 *  Copyright (C) Codeplay Software Limited
-*  Copyright 2022 Intel Corporation
-*
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
@@ -21,2633 +19,2625 @@
 
 // Buffer APIs
 
-static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& d, sycl::buffer<float>& e,
                          sycl::buffer<std::complex<float>>& tauq,
                          sycl::buffer<std::complex<float>>& taup,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+    oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
                                           scratchpad, scratchpad_size);
 }
-static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<double>& tauq, sycl::buffer<double>& taup,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+    oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
                                           scratchpad, scratchpad_size);
 }
-static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tauq,
                          sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+    oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
                                           scratchpad, scratchpad_size);
 }
-static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<std::complex<double>>& tauq,
                          sycl::buffer<std::complex<double>>& taup,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+    oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
                                           scratchpad, scratchpad_size);
 }
-static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void getrs(backend_selector<backend::rocsolver> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs(backend_selector<backend::cusolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
                          std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+    oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
                                           ldb, scratchpad, scratchpad_size);
 }
-static inline void getrs(backend_selector<backend::rocsolver> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs(backend_selector<backend::cusolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+    oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
                                           ldb, scratchpad, scratchpad_size);
 }
-static inline void getrs(backend_selector<backend::rocsolver> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs(backend_selector<backend::cusolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+    oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
                                           ldb, scratchpad, scratchpad_size);
 }
-static inline void getrs(backend_selector<backend::rocsolver> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs(backend_selector<backend::cusolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
                          std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+    oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
                                           ldb, scratchpad, scratchpad_size);
 }
-static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& s,
                          sycl::buffer<double>& u, std::int64_t ldu, sycl::buffer<double>& vt,
                          std::int64_t ldvt, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+    oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
-static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& s,
                          sycl::buffer<float>& u, std::int64_t ldu, sycl::buffer<float>& vt,
                          std::int64_t ldvt, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+    oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
-static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u,
                          std::int64_t ldu, sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+    oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
-static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u,
                          std::int64_t ldu, sycl::buffer<std::complex<double>>& vt,
                          std::int64_t ldvt, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+    oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
-static inline void heevd(backend_selector<backend::rocsolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void heevd(backend_selector<backend::cusolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                           scratchpad, scratchpad_size);
 }
-static inline void heevd(backend_selector<backend::rocsolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void heevd(backend_selector<backend::cusolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                           scratchpad, scratchpad_size);
 }
-static inline void hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+    oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
                                           ldb, w, scratchpad, scratchpad_size);
 }
-static inline void hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+    oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
                                           ldb, w, scratchpad, scratchpad_size);
 }
-static inline void hetrd(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void hetrd(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& d, sycl::buffer<float>& e,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void hetrd(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void hetrd(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void hetrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void hetrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void hetrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void hetrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void orgbr(backend_selector<backend::rocsolver> selector, oneapi::mkl::generate vec,
+static inline void orgbr(backend_selector<backend::cusolver> selector, oneapi::math::generate vec,
                          std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
                          std::int64_t lda, sycl::buffer<float>& tau,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void orgbr(backend_selector<backend::rocsolver> selector, oneapi::mkl::generate vec,
+static inline void orgbr(backend_selector<backend::cusolver> selector, oneapi::math::generate vec,
                          std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
                          std::int64_t lda, sycl::buffer<double>& tau,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void orgtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void orgtr(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void orgtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void orgtr(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void ormtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+static inline void ormtr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+    oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
                                           tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+static inline void ormtr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+    oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
                                           tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormrq(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void ormrq(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormrq(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void ormrq(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormqr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void ormqr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormqr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void ormqr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potri(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potri(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potri(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potri(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                           scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
                          std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+    oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
                                           scratchpad, scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
                          std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+    oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
                                           scratchpad, scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+    oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
                                           scratchpad, scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+    oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
                                           scratchpad, scratchpad_size);
 }
-static inline void syevd(backend_selector<backend::rocsolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+static inline void syevd(backend_selector<backend::cusolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
                          std::int64_t lda, sycl::buffer<double>& w,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                           scratchpad, scratchpad_size);
 }
-static inline void syevd(backend_selector<backend::rocsolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+static inline void syevd(backend_selector<backend::cusolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
                          std::int64_t lda, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                           scratchpad, scratchpad_size);
 }
-static inline void sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
                          std::int64_t ldb, sycl::buffer<double>& w,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+    oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
                                           ldb, w, scratchpad, scratchpad_size);
 }
-static inline void sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
                          std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+    oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
                                           ldb, w, scratchpad, scratchpad_size);
 }
-static inline void sytrd(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void sytrd(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void sytrd(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void sytrd(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tau,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+    oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
                                           scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
                                           b, ldb, scratchpad, scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& b, std::int64_t ldb,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
                                           b, ldb, scratchpad, scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
                                           b, ldb, scratchpad, scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
                                           b, ldb, scratchpad, scratchpad_size);
 }
-static inline void ungbr(backend_selector<backend::rocsolver> selector, oneapi::mkl::generate vec,
+static inline void ungbr(backend_selector<backend::cusolver> selector, oneapi::math::generate vec,
                          std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void ungbr(backend_selector<backend::rocsolver> selector, oneapi::mkl::generate vec,
+static inline void ungbr(backend_selector<backend::cusolver> selector, oneapi::math::generate vec,
                          std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                           scratchpad, scratchpad_size);
 }
-static inline void ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void ungtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void ungtr(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void ungtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::uplo uplo,
+static inline void ungtr(backend_selector<backend::cusolver> selector, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+    oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
                                           scratchpad_size);
 }
-static inline void unmrq(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void unmrq(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmrq(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void unmrq(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmqr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void unmqr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmqr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void unmqr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+    oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
                                           c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+static inline void unmtr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+    oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
                                           tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmtr(backend_selector<backend::rocsolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+static inline void unmtr(backend_selector<backend::cusolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+    oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
                                           tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<float>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+    oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
                                                 stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<double>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+    oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
                                                 stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<float>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+    oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
                                                 stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<double>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+    oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
                                                 stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, sycl::buffer<std::complex<float>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline void getrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, sycl::buffer<std::complex<double>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<float>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<double>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+    oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
                                                 stride_ipiv, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
                                std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
                                                 tau, stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
                                std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
                                                 tau, stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+static inline void potrf_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
                                std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+static inline void potrf_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
                                std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
                                                 batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline void potrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline void potrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline void potrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline void potrs_batch(backend_selector<backend::cusolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
                                                 tau, stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
-static inline void ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline void ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+    oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
                                                 tau, stride_tau, batch_size, scratchpad,
                                                 scratchpad_size);
 }
 
 // USM APIs
 
-static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda, float* d,
                                 float* e, std::complex<float>* tauq, std::complex<float>* taup,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+    return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
                                                  taup, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, double* d, double* e,
                                 double* tauq, double* taup, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+    return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
                                                  taup, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, float* d, float* e,
                                 float* tauq, float* taup, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+    return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
                                                  taup, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 double* d, double* e, std::complex<double>* tauq,
                                 std::complex<double>* taup, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+    return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
                                                  taup, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, float* tau,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, double* tau,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, double* tau,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, float* tau,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                 std::int64_t* ipiv, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 std::int64_t* ipiv, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                                 double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+    return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* b, std::int64_t ldb,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
                                                  b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 double* a, std::int64_t lda, std::int64_t* ipiv, double* b,
                                 std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
                                                  b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* b,
                                 std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
                                                  b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* b, std::int64_t ldb,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
                                                  b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* s, double* u,
-                                std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                double* s, double* u, std::int64_t ldu, double* vt,
+                                std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+    return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
                                                  u, ldu, vt, ldvt, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* s, float* u,
-                                std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt,
+                                float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+    return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
                                                  u, ldu, vt, ldvt, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* s,
-                                std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                                std::int64_t ldvt, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, float* s, std::complex<float>* u,
+                                std::int64_t ldu, std::complex<float>* vt, std::int64_t ldvt,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+    return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
                                                  u, ldu, vt, ldvt, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* s, std::complex<double>* u, std::int64_t ldu,
-                                std::complex<double>* vt, std::int64_t ldvt,
+static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, double* s, std::complex<double>* u,
+                                std::int64_t ldu, std::complex<double>* vt, std::int64_t ldvt,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+    return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
                                                  u, ldu, vt, ldvt, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event heevd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event heevd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, float* w,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    return oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event heevd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event heevd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, double* w,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    return oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                                 std::int64_t ldb, float* w, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
                                                  b, ldb, w, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                                 std::int64_t ldb, double* w, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
                                                  b, ldb, w, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event hetrd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+static inline sycl::event hetrd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, float* d, float* e, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    return oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event hetrd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+static inline sycl::event hetrd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, double* d, double* e, std::complex<double>* tau,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    return oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event hetrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+static inline sycl::event hetrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event hetrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+static inline sycl::event hetrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgbr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+static inline sycl::event orgbr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, float* a, std::int64_t lda, float* tau,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgbr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+static inline sycl::event orgbr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                                 double* tau, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+static inline sycl::event orgtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event orgtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* tau, double* scratchpad,
+                                std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ormtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event ormtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
+    return oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event ormtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event ormtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 double* a, std::int64_t lda, double* tau, double* c,
                                 std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
+    return oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event ormrq(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event ormrq(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, float* a,
                                 std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event ormrq(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event ormrq(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, double* a,
                                 std::int64_t lda, double* tau, double* c, std::int64_t ldc,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event ormqr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event ormqr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, double* a,
                                 std::int64_t lda, double* tau, double* c, std::int64_t ldc,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event ormqr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event ormqr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, float* a,
                                 std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+static inline sycl::event potri(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event potri(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+static inline sycl::event potri(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+static inline sycl::event potri(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+    return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
                                                  scratchpad_size, dependencies);
 }
-static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+    return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
                                                  ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                 double* a, std::int64_t lda, double* b, std::int64_t ldb,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+    return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
                                                  ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                                 std::int64_t ldb, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+    return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
                                                  ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                 std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                                 std::int64_t ldb, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+    return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
                                                  ldb, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event syevd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event syevd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, double* w, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    return oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event syevd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event syevd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, float* w, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+    return oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
                                                  b, ldb, w, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
                                                  b, ldb, w, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event sytrd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* d, double* e, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event sytrd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* d, double* e, double* tau,
+                                double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    return oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sytrd(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+static inline sycl::event sytrd(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* d, float* e, float* tau, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+    return oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                std::int64_t* ipiv, double* scratchpad,
+static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                                 std::int64_t ldb, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+    return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                 double* a, std::int64_t lda, double* b, std::int64_t ldb,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+    return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+    return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                 std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                                 std::int64_t ldb, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+    return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event ungbr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+static inline sycl::event ungbr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungbr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+static inline sycl::event ungbr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::int64_t k, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
                                 std::int64_t n, std::int64_t k, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* tau,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+static inline sycl::event ungtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+static inline sycl::event ungtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* tau,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
                                                  scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event unmrq(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event unmrq(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event unmrq(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event unmrq(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event unmqr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event unmqr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event unmqr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline sycl::event unmqr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
                                                  tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event unmtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event unmtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
+    return oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event unmtr(backend_selector<backend::rocsolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event unmtr(backend_selector<backend::cusolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
+    return oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
                                                  dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, float* a, std::int64_t lda,
                                       std::int64_t stride_a, float* tau, std::int64_t stride_tau,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        tau, stride_tau, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, double* a, std::int64_t lda,
                                       std::int64_t stride_a, double* tau, std::int64_t stride_tau,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        tau, stride_tau, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<float>* tau,
                                       std::int64_t stride_tau, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        tau, stride_tau, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<double>* tau,
                                       std::int64_t stride_tau, std::int64_t batch_size,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        tau, stride_tau, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, float** a,
-                                      std::int64_t* lda, float** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, float** a, std::int64_t* lda, float** tau,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, double** a,
-                                      std::int64_t* lda, double** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, double** a, std::int64_t* lda, double** tau,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::complex<float>** a,
-                                      std::int64_t* lda, std::complex<float>** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+                                      std::complex<float>** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                      std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::complex<double>** a,
-                                      std::int64_t* lda, std::complex<double>** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
+static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+                                      std::complex<double>** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+    return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, float* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, double* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, float** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, float* scratchpad,
+                                      std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, double** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, double* scratchpad,
+                                      std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                      std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
+static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                       float* a, std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t* ipiv, std::int64_t stride_ipiv,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                       double* a, std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t* ipiv, std::int64_t stride_ipiv,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                       std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
                                       std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
                                                        ipiv, stride_ipiv, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* n, float** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
+                                      float** a, std::int64_t* lda, std::int64_t** ipiv,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* n, double** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
+                                      double** a, std::int64_t* lda, std::int64_t** ipiv,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
+                                      std::complex<float>** a, std::int64_t* lda,
                                       std::int64_t** ipiv, std::int64_t group_count,
                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
+                                      std::complex<double>** a, std::int64_t* lda,
                                       std::int64_t** ipiv, std::int64_t group_count,
                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+    return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
+static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::transpose trans, std::int64_t n,
                                       std::int64_t nrhs, float* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, float* b, std::int64_t ldb,
                                       std::int64_t stride_b, std::int64_t batch_size,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(
+    return oneapi::math::lapack::cusolver::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
+static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::transpose trans, std::int64_t n,
                                       std::int64_t nrhs, double* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, double* b, std::int64_t ldb,
                                       std::int64_t stride_b, std::int64_t batch_size,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(
+    return oneapi::math::lapack::cusolver::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(
-    backend_selector<backend::rocsolver> selector, oneapi::mkl::transpose trans, std::int64_t n,
+    backend_selector<backend::cusolver> selector, oneapi::math::transpose trans, std::int64_t n,
     std::int64_t nrhs, std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
     std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<float>* b, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size, std::complex<float>* scratchpad,
     std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(
+    return oneapi::math::lapack::cusolver::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(
-    backend_selector<backend::rocsolver> selector, oneapi::mkl::transpose trans, std::int64_t n,
+    backend_selector<backend::cusolver> selector, oneapi::math::transpose trans, std::int64_t n,
     std::int64_t nrhs, std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
     std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<double>* b, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size, std::complex<double>* scratchpad,
     std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(
+    return oneapi::math::lapack::cusolver::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
+static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
                                       std::int64_t* nrhs, float** a, std::int64_t* lda,
                                       std::int64_t** ipiv, float** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                        ipiv, b, ldb, group_count, group_sizes,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
+static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
                                       std::int64_t* nrhs, double** a, std::int64_t* lda,
                                       std::int64_t** ipiv, double** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                        ipiv, b, ldb, group_count, group_sizes,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
+static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
                                       std::int64_t* nrhs, std::complex<float>** a,
                                       std::int64_t* lda, std::int64_t** ipiv,
                                       std::complex<float>** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                        ipiv, b, ldb, group_count, group_sizes,
                                                        scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(
-    backend_selector<backend::rocsolver> selector, oneapi::mkl::transpose* trans, std::int64_t* n,
+    backend_selector<backend::cusolver> selector, oneapi::math::transpose* trans, std::int64_t* n,
     std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
     std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
     std::int64_t* group_sizes, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
     const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
                                                        ipiv, b, ldb, group_count, group_sizes,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                                       std::int64_t stride_a, float* tau, std::int64_t stride_tau,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
                                                        stride_a, tau, stride_tau, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                                       std::int64_t stride_a, double* tau, std::int64_t stride_tau,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
                                                        stride_a, tau, stride_tau, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a,
+static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, float** a,
                                       std::int64_t* lda, float** tau, std::int64_t group_count,
                                       std::int64_t* group_sizes, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a,
+static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, double** a,
                                       std::int64_t* lda, double** tau, std::int64_t group_count,
                                       std::int64_t* group_sizes, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, float* a,
                                       std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        stride_a, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, double* a,
                                       std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        stride_a, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n,
                                       std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        stride_a, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n,
                                       std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t batch_size,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        stride_a, batch_size, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, float** a,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, float** a,
                                       std::int64_t* lda, std::int64_t group_count,
                                       std::int64_t* group_sizes, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, double** a,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, double** a,
                                       std::int64_t* lda, std::int64_t group_count,
                                       std::int64_t* group_sizes, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::complex<float>** a, std::int64_t* lda,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::complex<double>** a, std::int64_t* lda,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+    return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       float* a, std::int64_t lda, std::int64_t stride_a, float* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        stride_a, b, ldb, stride_b, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       double* a, std::int64_t lda, std::int64_t stride_a, double* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        stride_a, b, ldb, stride_b, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<float>* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        stride_a, b, ldb, stride_b, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<double>* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        stride_a, b, ldb, stride_b, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        b, ldb, group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       double** a, std::int64_t* lda, double** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        b, ldb, group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       std::complex<float>** a, std::int64_t* lda,
                                       std::complex<float>** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        b, ldb, group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       std::complex<double>** a, std::int64_t* lda,
                                       std::complex<double>** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+    return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
                                                        b, ldb, group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::int64_t k, std::complex<float>* a,
                                       std::int64_t lda, std::int64_t stride_a,
                                       std::complex<float>* tau, std::int64_t stride_tau,
                                       std::int64_t batch_size, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
                                                        stride_a, tau, stride_tau, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
                                       std::int64_t n, std::int64_t k, std::complex<double>* a,
                                       std::int64_t lda, std::int64_t stride_a,
                                       std::complex<double>* tau, std::int64_t stride_tau,
                                       std::int64_t batch_size, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
+    return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
                                                        stride_a, tau, stride_tau, batch_size,
                                                        scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
+static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, std::complex<float>** a,
+                                      std::int64_t* lda, std::complex<float>** tau,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
-static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
+static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
+                                      std::int64_t* n, std::int64_t* k, std::complex<double>** a,
+                                      std::int64_t* lda, std::complex<double>** tau,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+    return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
                                                        group_count, group_sizes, scratchpad,
                                                        scratchpad_size, dependencies);
 }
 
 // SCRATCHPAD APIs
 template <typename fp_type>
-std::int64_t gebrd_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+std::int64_t gebrd_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
                                    std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::gebrd_scratchpad_size<fp_type>(selector.get_queue(), m,
+    return oneapi::math::lapack::cusolver::gebrd_scratchpad_size<fp_type>(selector.get_queue(), m,
                                                                           n, lda);
 }
 template <typename fp_type>
-std::int64_t gerqf_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+std::int64_t gerqf_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
                                    std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::gerqf_scratchpad_size<fp_type>(selector.get_queue(), m,
+    return oneapi::math::lapack::cusolver::gerqf_scratchpad_size<fp_type>(selector.get_queue(), m,
                                                                           n, lda);
 }
 template <typename fp_type>
-std::int64_t geqrf_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+std::int64_t geqrf_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
                                    std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_scratchpad_size<fp_type>(selector.get_queue(), m,
+    return oneapi::math::lapack::cusolver::geqrf_scratchpad_size<fp_type>(selector.get_queue(), m,
                                                                           n, lda);
 }
 template <typename fp_type>
-std::int64_t gesvd_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+std::int64_t gesvd_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                                    std::int64_t m, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldu, std::int64_t ldvt) {
-    return oneapi::mkl::lapack::rocsolver::gesvd_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::gesvd_scratchpad_size<fp_type>(
         selector.get_queue(), jobu, jobvt, m, n, lda, ldu, ldvt);
 }
 template <typename fp_type>
-std::int64_t getrf_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+std::int64_t getrf_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
                                    std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::getrf_scratchpad_size<fp_type>(selector.get_queue(), m,
+    return oneapi::math::lapack::cusolver::getrf_scratchpad_size<fp_type>(selector.get_queue(), m,
                                                                           n, lda);
 }
 template <typename fp_type>
-std::int64_t getri_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t n,
+std::int64_t getri_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t n,
                                    std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::getri_scratchpad_size<fp_type>(selector.get_queue(), n,
+    return oneapi::math::lapack::cusolver::getri_scratchpad_size<fp_type>(selector.get_queue(), n,
                                                                           lda);
 }
 template <typename fp_type>
-std::int64_t getrs_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+std::int64_t getrs_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                    std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::rocsolver::getrs_scratchpad_size<fp_type>(selector.get_queue(),
+    return oneapi::math::lapack::cusolver::getrs_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           trans, n, nrhs, lda, ldb);
 }
 template <typename fp_type>
-std::int64_t heevd_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t heevd_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::heevd_scratchpad_size<fp_type>(selector.get_queue(),
+    return oneapi::math::lapack::cusolver::heevd_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           jobz, uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t hegvd_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb) {
-    return oneapi::mkl::lapack::rocsolver::hegvd_scratchpad_size<fp_type>(
+std::int64_t hegvd_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldb) {
+    return oneapi::math::lapack::cusolver::hegvd_scratchpad_size<fp_type>(
         selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
 }
 template <typename fp_type>
-std::int64_t hetrd_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::hetrd_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t hetrd_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::hetrd_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t hetrf_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::hetrf_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t hetrf_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::hetrf_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t orgbr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
+std::int64_t orgbr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::orgbr_scratchpad_size<fp_type>(selector.get_queue(),
+    return oneapi::math::lapack::cusolver::orgbr_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           vect, m, n, k, lda);
 }
 template <typename fp_type>
-std::int64_t orgtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::orgtr_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t orgtr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::orgtr_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t orgqr_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+std::int64_t orgqr_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
                                    std::int64_t n, std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::orgqr_scratchpad_size<fp_type>(selector.get_queue(), m,
+    return oneapi::math::lapack::cusolver::orgqr_scratchpad_size<fp_type>(selector.get_queue(), m,
                                                                           n, k, lda);
 }
 template <typename fp_type>
-std::int64_t ormrq_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t ormrq_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::rocsolver::ormrq_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::ormrq_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type>
-std::int64_t ormqr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t ormqr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::rocsolver::ormqr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::ormqr_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type>
-std::int64_t ormtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+std::int64_t ormtr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t lda, std::int64_t ldc) {
-    return oneapi::mkl::lapack::rocsolver::ormtr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::ormtr_scratchpad_size<fp_type>(
         selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
 }
 template <typename fp_type>
-std::int64_t potrf_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::potrf_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t potrf_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::potrf_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t potrs_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+std::int64_t potrs_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                    std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::rocsolver::potrs_scratchpad_size<fp_type>(selector.get_queue(),
+    return oneapi::math::lapack::cusolver::potrs_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, nrhs, lda, ldb);
 }
 template <typename fp_type>
-std::int64_t potri_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::potri_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t potri_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::potri_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t sytrf_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::sytrf_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t sytrf_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::sytrf_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t syevd_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t syevd_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::syevd_scratchpad_size<fp_type>(selector.get_queue(),
+    return oneapi::math::lapack::cusolver::syevd_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           jobz, uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t sygvd_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb) {
-    return oneapi::mkl::lapack::rocsolver::sygvd_scratchpad_size<fp_type>(
+std::int64_t sygvd_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t itype,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldb) {
+    return oneapi::math::lapack::cusolver::sygvd_scratchpad_size<fp_type>(
         selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
 }
 template <typename fp_type>
-std::int64_t sytrd_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::sytrd_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t sytrd_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::sytrd_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t trtrs_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+std::int64_t trtrs_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                   oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                    std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::rocsolver::trtrs_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::trtrs_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, trans, diag, n, nrhs, lda, ldb);
 }
 template <typename fp_type>
-std::int64_t ungbr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
+std::int64_t ungbr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::ungbr_scratchpad_size<fp_type>(selector.get_queue(),
+    return oneapi::math::lapack::cusolver::ungbr_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           vect, m, n, k, lda);
 }
 template <typename fp_type>
-std::int64_t ungqr_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+std::int64_t ungqr_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
                                    std::int64_t n, std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::ungqr_scratchpad_size<fp_type>(selector.get_queue(), m,
+    return oneapi::math::lapack::cusolver::ungqr_scratchpad_size<fp_type>(selector.get_queue(), m,
                                                                           n, k, lda);
 }
 template <typename fp_type>
-std::int64_t ungtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::rocsolver::ungtr_scratchpad_size<fp_type>(selector.get_queue(),
+std::int64_t ungtr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::cusolver::ungtr_scratchpad_size<fp_type>(selector.get_queue(),
                                                                           uplo, n, lda);
 }
 template <typename fp_type>
-std::int64_t unmrq_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t unmrq_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::rocsolver::unmrq_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::unmrq_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type>
-std::int64_t unmqr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t unmqr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::rocsolver::unmqr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::unmqr_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type>
-std::int64_t unmtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+std::int64_t unmtr_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t lda, std::int64_t ldc) {
-    return oneapi::mkl::lapack::rocsolver::unmtr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::unmtr_scratchpad_size<fp_type>(
         selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
 }
 template <typename fp_type>
-std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t stride_ipiv,
                                          std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <typename fp_type>
-std::int64_t getri_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t getri_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t n, std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_ipiv, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<fp_type>(
         selector.get_queue(), n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <typename fp_type>
-std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                         oneapi::mkl::transpose trans, std::int64_t n,
+std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                         oneapi::math::transpose trans, std::int64_t n,
                                          std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_ipiv, std::int64_t ldb,
                                          std::int64_t stride_b, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b,
         batch_size);
 }
 template <typename fp_type>
-std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t stride_tau,
                                          std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, stride_a, stride_tau, batch_size);
 }
 template <typename fp_type>
-std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, lda, stride_a, batch_size);
 }
 template <typename fp_type>
-std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                          std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
                                          std::int64_t stride_b, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
 }
 template <typename fp_type>
-std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t k,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_tau, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::orgqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::orgqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
 }
 template <typename fp_type>
-std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t k,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_tau, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::rocsolver::ungqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::ungqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
 }
 template <typename fp_type>
-std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, group_count, group_sizes);
 }
 template <typename fp_type>
-std::int64_t getri_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t getri_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<fp_type>(
         selector.get_queue(), n, lda, group_count, group_sizes);
 }
 template <typename fp_type>
-std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                         oneapi::mkl::transpose* trans, std::int64_t* n,
+std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                         oneapi::math::transpose* trans, std::int64_t* n,
                                          std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes);
 }
 template <typename fp_type>
-std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, group_count, group_sizes);
 }
 template <typename fp_type>
-std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* k,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::orgqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::orgqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, group_count, group_sizes);
 }
 template <typename fp_type>
-std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
+std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, lda, group_count, group_sizes);
 }
 template <typename fp_type>
-std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
+std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
                                          std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes);
 }
 template <typename fp_type>
-std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* k,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::rocsolver::ungqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::cusolver::ungqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, group_count, group_sizes);
 }
diff --git a/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp b/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hpp
similarity index 80%
rename from include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp
rename to include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hpp
index 6d31a05af..40192b082 100644
--- a/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp
+++ b/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hpp
@@ -16,8 +16,8 @@
 *  limitations under the License.
 *
 **************************************************************************/
-#ifndef _ONEMKL_LAPACK_CUSOLVER_HPP_
-#define _ONEMKL_LAPACK_CUSOLVER_HPP_
+#ifndef _ONEMATH_LAPACK_CUSOLVER_HPP_
+#define _ONEMATH_LAPACK_CUSOLVER_HPP_
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
 #else
@@ -27,19 +27,19 @@
 #include <cstdint>
 #include <string>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
-#include "onemkl_lapack_cusolver.hxx"
+#include "onemath_lapack_cusolver.hxx"
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_LAPACK_CUSOLVER_HPP_
+#endif //_ONEMATH_LAPACK_CUSOLVER_HPP_
diff --git a/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hxx b/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hxx
new file mode 100644
index 000000000..a3d4de61c
--- /dev/null
+++ b/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hxx
@@ -0,0 +1,1854 @@
+/***************************************************************************
+*  Copyright (C) Codeplay Software Limited
+*  Licensed under the Apache License, Version 2.0 (the "License");
+*  you may not use this file except in compliance with the License.
+*  You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+*  For your convenience, a copy of the License has been included in this
+*  repository.
+*
+*  Unless required by applicable law or agreed to in writing, software
+*  distributed under the License is distributed on an "AS IS" BASIS,
+*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+*  See the License for the specific language governing permissions and
+*  limitations under the License.
+*
+**************************************************************************/
+
+// Buffer APIs
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tauq,
+                          sycl::buffer<std::complex<float>>& taup,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
+                          sycl::buffer<double>& e, sycl::buffer<double>& tauq,
+                          sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
+                          sycl::buffer<float>& e, sycl::buffer<float>& tauq,
+                          sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tauq,
+                          sycl::buffer<std::complex<double>>& taup,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
+                          std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
+                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b,
+                          std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
+                          sycl::buffer<double>& vt, std::int64_t ldvt,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
+                          sycl::buffer<float>& vt, std::int64_t ldvt,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<float>& s,
+                          sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
+                          sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<double>& s,
+                          sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
+                          sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
+                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
+                          sycl::buffer<double>& e, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
+                          sycl::buffer<float>& e, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<float>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<double>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<float>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<double>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+// USM APIs
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* d, float* e,
+                                 std::complex<float>* tauq, std::complex<float>* taup,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* d, double* e, double* tauq, double* taup,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* d, float* e, float* tauq, float* taup,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* d, double* e,
+                                 std::complex<double>* tauq, std::complex<double>* taup,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
+                                 std::int64_t* ipiv, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
+                                 std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* b, std::int64_t ldb, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
+                                 float* b, std::int64_t ldb, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 double* a, std::int64_t lda, double* s, double* u,
+                                 std::int64_t ldu, double* vt, std::int64_t ldvt,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu,
+                                 float* vt, std::int64_t ldvt, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* s,
+                                 std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
+                                 std::int64_t ldvt, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* s,
+                                 std::complex<double>* u, std::int64_t ldu,
+                                 std::complex<double>* vt, std::int64_t ldvt,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, float* w, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, double* w, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                 float* w, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                 double* w, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* d, float* e,
+                                 std::complex<float>* tau, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* d, double* e,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
+                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
+                                 double* tau, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 double* a, std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 float* a, std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                 float* tau, float* c, std::int64_t ldc, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                 double* tau, double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
+                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                 double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                 double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
+                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, float* a, std::int64_t lda, float* b,
+                                 std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, double* a, std::int64_t lda, double* b,
+                                 std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* w, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* w, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* b, std::int64_t ldb, double* w,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* b, std::int64_t ldb, float* w,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* d, double* e, double* tau,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* d, float* e, float* tau,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
+                                 double* b, std::int64_t ldb, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
+                                 float* b, std::int64_t ldb, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
+                                 std::int64_t ldc, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* c, std::int64_t ldc,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, float* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       double* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       float** a, std::int64_t* lda, float** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       double** a, std::int64_t* lda, double** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::complex<float>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::complex<double>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t* ipiv, std::int64_t stride_ipiv,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       float** a, std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       double** a, std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, float* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, double* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, double* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::complex<float>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::complex<double>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, float** a,
+                                       std::int64_t* lda, std::int64_t** ipiv, float** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, double** a,
+                                       std::int64_t* lda, std::int64_t** ipiv, double** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::complex<float>** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::complex<double>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, float* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, double* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, float** a, std::int64_t* lda, float** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, double** a, std::int64_t* lda, double** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       float* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, float** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, double** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, float* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, double* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, float** a,
+                                       std::int64_t* lda, float** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, double** a,
+                                       std::int64_t* lda, double** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
+                                       std::int64_t* lda, std::complex<float>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    std::complex<double>** a, std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes, std::complex<double>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, std::complex<float>** a, std::int64_t* lda,
+                                       std::complex<float>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, std::complex<double>** a, std::int64_t* lda,
+                                       std::complex<double>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+// SCRATCHPAD APIs
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                                  oneapi::math::jobsvd jobvt, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldu, std::int64_t ldvt);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans,
+                                                  std::int64_t n, std::int64_t nrhs,
+                                                  std::int64_t lda, std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                                  oneapi::math::uplo uplo, std::int64_t n,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
+                                                  oneapi::math::job jobz, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect,
+                                                  std::int64_t m, std::int64_t n, std::int64_t k,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::uplo uplo,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t nrhs,
+                                                  std::int64_t lda, std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                                  oneapi::math::uplo uplo, std::int64_t n,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
+                                                  oneapi::math::job jobz, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  oneapi::math::transpose trans,
+                                                  oneapi::math::diag diag, std::int64_t n,
+                                                  std::int64_t nrhs, std::int64_t lda,
+                                                  std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect,
+                                                  std::int64_t m, std::int64_t n, std::int64_t k,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::uplo uplo,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t lda,
+                                                        std::int64_t stride_a,
+                                                        std::int64_t stride_ipiv,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t stride_ipiv,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t lda,
+                                                        std::int64_t stride_a,
+                                                        std::int64_t stride_tau,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                        std::int64_t n, std::int64_t lda,
+                                                        std::int64_t stride_a,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                        std::int64_t n, std::int64_t nrhs,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t ldb, std::int64_t stride_b,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t k,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t stride_tau,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t k,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t stride_tau,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* lda,
+                                                        std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size(
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* lda,
+                                                        std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* k,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue,
+                                                        oneapi::math::uplo* uplo, std::int64_t* n,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue,
+                                                        oneapi::math::uplo* uplo, std::int64_t* n,
+                                                        std::int64_t* nrhs, std::int64_t* lda,
+                                                        std::int64_t* ldb, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* k,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
diff --git a/include/oneapi/math/lapack/detail/lapack_loader.hpp b/include/oneapi/math/lapack/detail/lapack_loader.hpp
new file mode 100644
index 000000000..be5015257
--- /dev/null
+++ b/include/oneapi/math/lapack/detail/lapack_loader.hpp
@@ -0,0 +1,2394 @@
+/*******************************************************************************
+* Copyright 2020-2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#pragma once
+
+#include <complex>
+#include <cstdint>
+
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
+#include <CL/sycl.hpp>
+#endif
+
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
+
+namespace oneapi {
+namespace math {
+namespace lapack {
+namespace detail {
+
+ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tauq,
+                          sycl::buffer<std::complex<float>>& taup,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<double>& tauq, sycl::buffer<double>& taup,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tauq,
+                          sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tauq,
+                          sycl::buffer<std::complex<double>>& taup,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
+                          std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
+                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b,
+                          std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
+                          std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
+                          sycl::buffer<double>& vt, std::int64_t ldvt,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
+                          sycl::buffer<float>& vt, std::int64_t ldvt,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u,
+                          std::int64_t ldu, sycl::buffer<std::complex<float>>& vt,
+                          std::int64_t ldvt, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u,
+                          std::int64_t ldu, sycl::buffer<std::complex<double>>& vt,
+                          std::int64_t ldvt, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                          oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                          oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgbr(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgbr(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<double>& w,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                          oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
+                          std::int64_t ldb, sycl::buffer<double>& w,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                          oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
+                          std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungbr(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungbr(oneapi::math::device libkey, sycl::queue& queue,
+                          oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<float>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<double>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<std::complex<float>>& b,
+                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<std::complex<double>>& b,
+                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
+                                std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
+                                std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                                std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                                std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& b,
+                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& b,
+                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<float>* a, std::int64_t lda, float* d,
+                                 float* e, std::complex<float>* tauq, std::complex<float>* taup,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, double* a, std::int64_t lda, double* d, double* e,
+                                 double* tauq, double* taup, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, float* a, std::int64_t lda, float* d, float* e,
+                                 float* tauq, float* taup, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                 double* d, double* e, std::complex<double>* tauq,
+                                 std::complex<double>* taup, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, float* a, std::int64_t lda, float* tau,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, double* a, std::int64_t lda, double* tau,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, double* a, std::int64_t lda, double* tau,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, float* a, std::int64_t lda, float* tau,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                 double* a, std::int64_t lda, std::int64_t* ipiv, double* b,
+                                 std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* b,
+                                 std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                 std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                 double* s, double* u, std::int64_t ldu, double* vt,
+                                 std::int64_t ldvt, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                 std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                 float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                 std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, float* s, std::complex<float>* u,
+                                 std::int64_t ldu, std::complex<float>* vt, std::int64_t ldvt,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                 std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, double* s, std::complex<double>* u,
+                                 std::int64_t ldu, std::complex<double>* vt, std::int64_t ldvt,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* w,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* w,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                 float* w, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                 double* w, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, float* d, float* e, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, double* d, double* e, std::complex<double>* tau,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, float* a, std::int64_t lda, float* tau,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
+                                 double* tau, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
+                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 double* a, std::int64_t lda, double* tau, double* c,
+                                 std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k, float* a,
+                                 std::int64_t lda, float* tau, float* c, std::int64_t ldc,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k, double* a,
+                                 std::int64_t lda, double* tau, double* c, std::int64_t ldc,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k, double* a,
+                                 std::int64_t lda, double* tau, double* c, std::int64_t ldc,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k, float* a,
+                                 std::int64_t lda, float* tau, float* c, std::int64_t ldc,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                 float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                 double* a, std::int64_t lda, double* b, std::int64_t ldb,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                                 std::int64_t ldb, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                 std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
+                                 std::int64_t ldb, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* w, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* w, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* b, std::int64_t ldb, double* w,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue,
+                                 std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* b, std::int64_t ldb, float* w,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* d, double* e, double* tau,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* d, float* e, float* tau,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                                 std::int64_t ldb, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                 double* a, std::int64_t lda, double* b, std::int64_t ldb,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                 float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                 std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
+                                 std::int64_t ldb, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue,
+                                 oneapi::math::side side, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, float* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, double* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a,
+                                       std::complex<float>* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a,
+                                       std::complex<double>* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, float** a,
+                                       std::int64_t* lda, float** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, double** a,
+                                       std::int64_t* lda, double** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::complex<float>** a,
+                                       std::int64_t* lda, std::complex<float>** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::complex<double>** a,
+                                       std::int64_t* lda, std::complex<double>** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, float** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, double** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::complex<float>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::complex<double>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t n, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t n, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* n, float** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* n, double** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::transpose trans, std::int64_t n,
+                                       std::int64_t nrhs, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, float* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::transpose trans, std::int64_t n,
+                                       std::int64_t nrhs, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, double* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<float>* b, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size, std::complex<float>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<double>* b, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size, std::complex<double>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::transpose* trans, std::int64_t* n,
+                                       std::int64_t* nrhs, float** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, float** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::transpose* trans, std::int64_t* n,
+                                       std::int64_t* nrhs, double** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, double** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a, std::int64_t* lda,
+    std::int64_t** ipiv, std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
+    std::int64_t* group_sizes, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda,
+    std::int64_t** ipiv, std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
+    std::int64_t* group_sizes, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+    const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, std::int64_t k, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, float* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t m, std::int64_t n, std::int64_t k, double* a,
+                                       std::int64_t lda, std::int64_t stride_a, double* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a,
+                                       std::int64_t* lda, float** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                       double** a, std::int64_t* lda, double** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                       std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n, float** a,
+                                       std::int64_t* lda, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n, double** a,
+                                       std::int64_t* lda, std::int64_t group_count,
+                                       std::int64_t* group_sizes, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                       float* a, std::int64_t lda, std::int64_t stride_a, float* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       double* b, std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n,
+                                       std::int64_t* nrhs, float** a, std::int64_t* lda, float** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n,
+                                       std::int64_t* nrhs, double** a, std::int64_t* lda,
+                                       double** b, std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n,
+                                       std::int64_t* nrhs, std::complex<float>** a,
+                                       std::int64_t* lda, std::complex<float>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       oneapi::math::uplo* uplo, std::int64_t* n,
+                                       std::int64_t* nrhs, std::complex<double>** a,
+                                       std::int64_t* lda, std::complex<double>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::complex<float>* a, std::int64_t lda, std::int64_t stride_a, std::complex<float>* tau,
+    std::int64_t stride_tau, std::int64_t batch_size, std::complex<float>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::complex<double>* a, std::int64_t lda, std::int64_t stride_a, std::complex<double>* tau,
+    std::int64_t stride_tau, std::int64_t batch_size, std::complex<double>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::complex<float>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue,
+                                       std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::complex<double>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                   std::int64_t m, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldu, std::int64_t ldvt);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                   std::int64_t m, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldu, std::int64_t ldvt);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                   std::int64_t lda, std::int64_t ldb);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t heevd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t hegvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldb);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t hetrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t hetrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                   std::int64_t n, std::int64_t k, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldc);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                   std::int64_t lda, std::int64_t ldb);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldb);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                   oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                   std::int64_t lda, std::int64_t ldb);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungbr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
+                                   std::int64_t n, std::int64_t k, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmrq_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldc);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t m, std::int64_t n, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t stride_ipiv,
+                                         std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_ipiv, std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         oneapi::math::transpose trans, std::int64_t n,
+                                         std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_ipiv, std::int64_t ldb,
+                                         std::int64_t stride_b, std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t m, std::int64_t n, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t stride_tau,
+                                         std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                         std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
+                                         std::int64_t stride_b, std::int64_t batch_size);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t m, std::int64_t n, std::int64_t k,
+                                         std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_tau, std::int64_t batch_size);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t m, std::int64_t n, std::int64_t k,
+                                         std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_tau, std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         oneapi::math::transpose* trans, std::int64_t* n,
+                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
+                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes);
+
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::jobsvd jobu,
+                                                         oneapi::math::jobsvd jobvt, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda,
+                                                         std::int64_t ldu, std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+    oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu,
+    std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+    oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu,
+    std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+    oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu,
+    std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::transpose trans,
+                                                         std::int64_t n, std::int64_t nrhs,
+                                                         std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::transpose trans,
+                                                          std::int64_t n, std::int64_t nrhs,
+                                                          std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t heevd_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+    oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t heevd_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+    oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+    oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+    oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::generate vect,
+                                                         std::int64_t m, std::int64_t n,
+                                                         std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::generate vect,
+                                                          std::int64_t m, std::int64_t n,
+                                                          std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t k,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t k,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t nrhs, std::int64_t lda,
+                                                         std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t nrhs, std::int64_t lda,
+                                                          std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t syevd_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t syevd_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::job jobz,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, std::int64_t itype,
+                                                         oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue, std::int64_t itype,
+                                                          oneapi::math::job jobz,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size<float>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size<double>(oneapi::math::device libkey,
+                                                          sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+    oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+    oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+    oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+    oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                                       sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                                        sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
+    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
+    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
+    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
+    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(oneapi::math::device libkey,
+                                                               sycl::queue& queue, std::int64_t* m,
+                                                               std::int64_t* n, std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(oneapi::math::device libkey,
+                                                                sycl::queue& queue, std::int64_t* m,
+                                                                std::int64_t* n, std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<float>(oneapi::math::device libkey,
+                                                               sycl::queue& queue, std::int64_t* n,
+                                                               std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<double>(oneapi::math::device libkey,
+                                                                sycl::queue& queue, std::int64_t* n,
+                                                                std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::math::device libkey,
+                                                               sycl::queue& queue, std::int64_t* m,
+                                                               std::int64_t* n, std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::math::device libkey,
+                                                                sycl::queue& queue, std::int64_t* m,
+                                                                std::int64_t* n, std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
+    std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
+    std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
+    std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
+    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
+    std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
+} //namespace detail
+} //namespace lapack
+} //namespace math
+} //namespace oneapi
diff --git a/include/oneapi/mkl/lapack/detail/lapack_rt.hpp b/include/oneapi/math/lapack/detail/lapack_rt.hpp
similarity index 87%
rename from include/oneapi/mkl/lapack/detail/lapack_rt.hpp
rename to include/oneapi/math/lapack/detail/lapack_rt.hpp
index 5199a0ce5..a84331ceb 100644
--- a/include/oneapi/mkl/lapack/detail/lapack_rt.hpp
+++ b/include/oneapi/math/lapack/detail/lapack_rt.hpp
@@ -28,14 +28,14 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/lapack/exceptions.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
-#include "oneapi/mkl/lapack/detail/lapack_loader.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/lapack/exceptions.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
+#include "oneapi/math/lapack/detail/lapack_loader.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 
 static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
@@ -168,7 +168,7 @@ static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::c
                          std::int64_t scratchpad_size) {
     detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size);
 }
-static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
                          std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
@@ -176,7 +176,7 @@ static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::
     detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
@@ -184,14 +184,14 @@ static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::
     detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b, std::int64_t ldb,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
                          std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
@@ -199,7 +199,7 @@ static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::
     detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                          std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
                          sycl::buffer<double>& vt, std::int64_t ldvt,
@@ -207,7 +207,7 @@ static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::m
     detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
                   scratchpad, scratchpad_size);
 }
-static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                          std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
                          sycl::buffer<float>& vt, std::int64_t ldvt,
@@ -215,7 +215,7 @@ static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::m
     detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
                   scratchpad, scratchpad_size);
 }
-static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
                          std::int64_t lda, sycl::buffer<float>& s,
                          sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
@@ -225,7 +225,7 @@ static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::m
     detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
                   scratchpad, scratchpad_size);
 }
-static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
                          std::int64_t lda, sycl::buffer<double>& s,
                          sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
@@ -235,22 +235,22 @@ static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::m
     detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
                   scratchpad, scratchpad_size);
 }
-static inline void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                   scratchpad_size);
 }
-static inline void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                   scratchpad_size);
 }
-static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
@@ -258,8 +258,8 @@ static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::jo
     detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
                   scratchpad_size);
 }
-static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
@@ -267,7 +267,7 @@ static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::jo
     detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
                   scratchpad_size);
 }
-static inline void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& d, sycl::buffer<float>& e,
                          sycl::buffer<std::complex<float>>& tau,
@@ -276,7 +276,7 @@ static inline void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_
     detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                   scratchpad_size);
 }
-static inline void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<std::complex<double>>& tau,
@@ -285,28 +285,28 @@ static inline void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_
     detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                   scratchpad_size);
 }
-static inline void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
 }
-static inline void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
 }
-static inline void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad,
                   scratchpad_size);
 }
-static inline void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
@@ -323,33 +323,33 @@ static inline void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
 }
-static inline void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
 }
-static inline void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
 }
-static inline void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
                          sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
                          sycl::buffer<double>& c, std::int64_t ldc,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
                          std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
                          std::int64_t ldc, sycl::buffer<float>& scratchpad,
@@ -357,7 +357,7 @@ static inline void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
                          std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
                          std::int64_t ldc, sycl::buffer<double>& scratchpad,
@@ -365,7 +365,7 @@ static inline void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
                          std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
                          std::int64_t ldc, sycl::buffer<double>& scratchpad,
@@ -373,7 +373,7 @@ static inline void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
                          std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
                          std::int64_t ldc, sycl::buffer<float>& scratchpad,
@@ -381,65 +381,65 @@ static inline void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size);
 }
-static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& b, std::int64_t ldb,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<float>>& scratchpad,
@@ -447,7 +447,7 @@ static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_
     detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<double>>& scratchpad,
@@ -455,77 +455,77 @@ static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_
     detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                   scratchpad_size);
 }
-static inline void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                   scratchpad_size);
 }
-static inline void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                   scratchpad_size);
 }
-static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
                          std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
                   scratchpad_size);
 }
-static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
                          std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
                   scratchpad_size);
 }
-static inline void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
                          sycl::buffer<double>& e, sycl::buffer<double>& tau,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                   scratchpad_size);
 }
-static inline void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
                          sycl::buffer<float>& e, sycl::buffer<float>& tau,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                   scratchpad_size);
 }
-static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
 }
-static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
 }
-static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
 }
-static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
 }
-static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<float>>& scratchpad,
@@ -533,24 +533,24 @@ static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl
     detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                   scratchpad, scratchpad_size);
 }
-static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                   scratchpad, scratchpad_size);
 }
-static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
                          std::int64_t ldb, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                   scratchpad, scratchpad_size);
 }
-static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<double>>& scratchpad,
@@ -558,7 +558,7 @@ static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl
     detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                   scratchpad, scratchpad_size);
 }
-static inline void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
@@ -566,7 +566,7 @@ static inline void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int
     detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad,
                   scratchpad_size);
 }
-static inline void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
@@ -588,21 +588,21 @@ static inline void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std
                          std::int64_t scratchpad_size) {
     detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
 }
-static inline void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
 }
-static inline void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
     detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
 }
-static inline void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
@@ -612,7 +612,7 @@ static inline void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
@@ -622,7 +622,7 @@ static inline void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
@@ -632,7 +632,7 @@ static inline void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+static inline void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                          std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
@@ -642,8 +642,8 @@ static inline void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
@@ -652,8 +652,8 @@ static inline void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl
     detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
                   scratchpad, scratchpad_size);
 }
-static inline void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
@@ -730,7 +730,7 @@ static inline void getri_batch(sycl::queue& queue, std::int64_t n,
     detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv,
                         batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
@@ -739,7 +739,7 @@ static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
     detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv,
                         stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
@@ -748,7 +748,7 @@ static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
     detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv,
                         stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
@@ -759,7 +759,7 @@ static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
     detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv,
                         stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size);
 }
-static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
@@ -820,21 +820,21 @@ static inline void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t
     detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau,
                         batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                                std::int64_t scratchpad_size) {
     detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                         scratchpad, scratchpad_size);
 }
-static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                                std::int64_t scratchpad_size) {
     detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                         scratchpad, scratchpad_size);
 }
-static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
@@ -842,7 +842,7 @@ static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::
     detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                         scratchpad, scratchpad_size);
 }
-static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
@@ -850,7 +850,7 @@ static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::
     detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                         scratchpad, scratchpad_size);
 }
-static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
                                std::int64_t stride_b, std::int64_t batch_size,
@@ -858,7 +858,7 @@ static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::
     detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
                         stride_b, batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
                                std::int64_t stride_b, std::int64_t batch_size,
@@ -866,7 +866,7 @@ static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::
     detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
                         stride_b, batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
@@ -876,7 +876,7 @@ static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::
     detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
                         stride_b, batch_size, scratchpad, scratchpad_size);
 }
-static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
                                std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
@@ -1047,7 +1047,7 @@ static inline sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex
     return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                                 std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1055,7 +1055,7 @@ static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans
     return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                 std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv,
                                 double* b, std::int64_t ldb, double* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1063,7 +1063,7 @@ static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans
     return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                 std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
                                 float* b, std::int64_t ldb, float* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1071,7 +1071,7 @@ static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans
     return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                                 std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1079,8 +1079,8 @@ static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans
     return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                 double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu,
                                 double* vt, std::int64_t ldvt, double* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1088,16 +1088,17 @@ static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
     return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                          ldvt, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt,
-                                std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu,
+                                float* vt, std::int64_t ldvt, float* scratchpad,
+                                std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                          ldvt, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, float* s,
                                 std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
                                 std::int64_t ldvt, std::complex<float>* scratchpad,
@@ -1106,8 +1107,8 @@ static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
     return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                          ldvt, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, double* s,
                                 std::complex<double>* u, std::int64_t ldu, std::complex<double>* vt,
                                 std::int64_t ldvt, std::complex<double>* scratchpad,
@@ -1116,14 +1117,14 @@ static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
     return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                          ldvt, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda, float* w,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 double* w, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1131,8 +1132,8 @@ static inline sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneap
     return detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                                 float* w, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1140,8 +1141,8 @@ static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::
     return detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                                 double* w, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1149,7 +1150,7 @@ static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::
     return detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, float* d, float* e,
                                 std::complex<float>* tau, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1157,7 +1158,7 @@ static inline sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std:
     return detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, double* d, double* e,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1165,28 +1166,28 @@ static inline sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std:
     return detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                 std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                 std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                                 double* tau, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
@@ -1207,139 +1208,141 @@ static inline sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t
     return detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, float* tau, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, double* tau, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                float* tau, float* c, std::int64_t ldc, float* scratchpad,
+                                std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                double* tau, double* c, std::int64_t ldc, double* scratchpad,
+                                std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
                                 std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
                                 std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size,
                          dependencies);
 }
-static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::int64_t nrhs, float* a, std::int64_t lda, float* b,
                                 std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::int64_t nrhs, double* a, std::int64_t lda, double* b,
                                 std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* b, std::int64_t ldb,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1347,7 +1350,7 @@ static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std:
     return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* b, std::int64_t ldb,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1355,80 +1358,80 @@ static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std:
     return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, double* a, std::int64_t lda, double* w,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+static inline sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, float* a, std::int64_t lda, float* w,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* b, std::int64_t ldb, double* w, double* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* b, std::int64_t ldb, double* w,
+                                double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* b, std::int64_t ldb, float* w, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, double* d, double* e, double* tau,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, float* d, float* e, float* tau,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, oneapi::math::diag diag,
                                 std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1436,8 +1439,8 @@ static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
     return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, oneapi::math::diag diag,
                                 std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
                                 double* b, std::int64_t ldb, double* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1445,8 +1448,8 @@ static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
     return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, oneapi::math::diag diag,
                                 std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
                                 float* b, std::int64_t ldb, float* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1454,8 +1457,8 @@ static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
     return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, oneapi::math::diag diag,
                                 std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1463,7 +1466,7 @@ static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
     return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                 std::int64_t n, std::int64_t k, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1471,7 +1474,7 @@ static inline sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, s
     return detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+static inline sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                 std::int64_t n, std::int64_t k, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* tau,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1494,14 +1497,14 @@ static inline sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t
     return detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
@@ -1509,8 +1512,8 @@ static inline sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std:
     return detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad,
                          scratchpad_size, dependencies);
 }
-static inline sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1518,8 +1521,8 @@ static inline sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
     return detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
@@ -1528,8 +1531,8 @@ static inline sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
     return detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1537,8 +1540,8 @@ static inline sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
     return detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+static inline sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
@@ -1547,21 +1550,22 @@ static inline sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
     return detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+static inline sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
+                                std::int64_t ldc, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
+static inline sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* tau,
+                                std::complex<double>* c, std::int64_t ldc,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
     return detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
                          scratchpad, scratchpad_size, dependencies);
@@ -1770,7 +1774,7 @@ static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::
     return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count,
                                group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, float* b, std::int64_t ldb,
@@ -1781,7 +1785,7 @@ static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose
                                stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
                                scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t n, std::int64_t nrhs, double* a,
                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, double* b, std::int64_t ldb,
@@ -1792,7 +1796,7 @@ static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose
                                stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
                                scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::complex<float>* b,
@@ -1804,7 +1808,7 @@ static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose
                                stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
                                scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::complex<double>* b,
@@ -1816,7 +1820,7 @@ static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose
                                stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
                                scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
                                       std::int64_t* n, std::int64_t* nrhs, float** a,
                                       std::int64_t* lda, std::int64_t** ipiv, float** b,
                                       std::int64_t* ldb, std::int64_t group_count,
@@ -1826,7 +1830,7 @@ static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose
     return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                                group_count, group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
                                       std::int64_t* n, std::int64_t* nrhs, double** a,
                                       std::int64_t* lda, std::int64_t** ipiv, double** b,
                                       std::int64_t* ldb, std::int64_t group_count,
@@ -1836,7 +1840,7 @@ static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose
     return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                                group_count, group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
                                       std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
                                       std::int64_t* lda, std::int64_t** ipiv,
                                       std::complex<float>** b, std::int64_t* ldb,
@@ -1846,7 +1850,7 @@ static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose
     return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                                group_count, group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
+static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
                                       std::int64_t* n, std::int64_t* nrhs, std::complex<double>** a,
                                       std::int64_t* lda, std::int64_t** ipiv,
                                       std::complex<double>** b, std::int64_t* ldb,
@@ -1891,7 +1895,7 @@ static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::
     return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count,
                                group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       float* a, std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
@@ -1899,7 +1903,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                                scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       double* a, std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
@@ -1907,7 +1911,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                                scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1915,7 +1919,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                                scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t batch_size,
                                       std::complex<double>* scratchpad,
@@ -1924,7 +1928,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size,
                                scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       float** a, std::int64_t* lda, std::int64_t group_count,
                                       std::int64_t* group_sizes, float* scratchpad,
                                       std::int64_t scratchpad_size,
@@ -1932,7 +1936,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* upl
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count,
                                group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       double** a, std::int64_t* lda, std::int64_t group_count,
                                       std::int64_t* group_sizes, double* scratchpad,
                                       std::int64_t scratchpad_size,
@@ -1940,7 +1944,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* upl
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count,
                                group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::complex<float>** a, std::int64_t* lda,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1948,7 +1952,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* upl
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count,
                                group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::complex<double>** a, std::int64_t* lda,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<double>* scratchpad,
@@ -1957,7 +1961,7 @@ static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* upl
     return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count,
                                group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       std::int64_t nrhs, float* a, std::int64_t lda,
                                       std::int64_t stride_a, float* b, std::int64_t ldb,
                                       std::int64_t stride_b, std::int64_t batch_size,
@@ -1966,7 +1970,7 @@ static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
                                stride_b, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       std::int64_t nrhs, double* a, std::int64_t lda,
                                       std::int64_t stride_a, double* b, std::int64_t ldb,
                                       std::int64_t stride_b, std::int64_t batch_size,
@@ -1975,7 +1979,7 @@ static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
                                stride_b, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<float>* b,
                                       std::int64_t ldb, std::int64_t stride_b,
@@ -1985,7 +1989,7 @@ static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
                                stride_b, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                       std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<double>* b,
                                       std::int64_t ldb, std::int64_t stride_b,
@@ -1995,7 +1999,7 @@ static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo
     return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
                                stride_b, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::int64_t* nrhs, float** a, std::int64_t* lda, float** b,
                                       std::int64_t* ldb, std::int64_t group_count,
                                       std::int64_t* group_sizes, float* scratchpad,
@@ -2004,7 +2008,7 @@ static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* upl
     return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb,
                                group_count, group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::int64_t* nrhs, double** a, std::int64_t* lda, double** b,
                                       std::int64_t* ldb, std::int64_t group_count,
                                       std::int64_t* group_sizes, double* scratchpad,
@@ -2013,7 +2017,7 @@ static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* upl
     return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb,
                                group_count, group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::int64_t* nrhs, std::complex<float>** a,
                                       std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
@@ -2022,7 +2026,7 @@ static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* upl
     return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb,
                                group_count, group_sizes, scratchpad, scratchpad_size, dependencies);
 }
-static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::int64_t* nrhs, std::complex<double>** a,
                                       std::int64_t* lda, std::complex<double>** b,
                                       std::int64_t* ldb, std::int64_t group_count,
@@ -2075,7 +2079,7 @@ std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int6
                                    std::int64_t lda) {
     return detail::gebrd_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, lda);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                    std::int64_t lda) {
     return detail::gerqf_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, lda);
@@ -2086,15 +2090,15 @@ std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int6
     return detail::geqrf_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, lda);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                    std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) {
     return detail::gesvd_scratchpad_size<fp_type>(get_device_id(queue), queue, jobu, jobvt, m, n,
                                                   lda, ldu, ldvt);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                    std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) {
     return detail::gesvd_scratchpad_size<fp_type>(get_device_id(queue), queue, jobu, jobvt, m, n,
                                                   lda, ldu, ldvt);
@@ -2109,43 +2113,44 @@ std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int6
     return detail::getri_scratchpad_size<fp_type>(get_device_id(queue), queue, n, lda);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                   std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
+std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t n, std::int64_t nrhs, std::int64_t lda,
+                                   std::int64_t ldb) {
     return detail::getrs_scratchpad_size<fp_type>(get_device_id(queue), queue, trans, n, nrhs, lda,
                                                   ldb);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
+std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
     return detail::heevd_scratchpad_size<fp_type>(get_device_id(queue), queue, jobz, uplo, n, lda);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldb) {
     return detail::hegvd_scratchpad_size<fp_type>(get_device_id(queue), queue, itype, jobz, uplo, n,
                                                   lda, ldb);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::hetrd_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::hetrf_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m,
                                    std::int64_t n, std::int64_t k, std::int64_t lda) {
     return detail::orgbr_scratchpad_size<fp_type>(get_device_id(queue), queue, vect, m, n, k, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::orgtr_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
@@ -2155,78 +2160,78 @@ std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int6
     return detail::orgqr_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, k, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda, std::int64_t ldc) {
     return detail::ormrq_scratchpad_size<fp_type>(get_device_id(queue), queue, side, trans, m, n, k,
                                                   lda, ldc);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda, std::int64_t ldc) {
     return detail::ormqr_scratchpad_size<fp_type>(get_device_id(queue), queue, side, trans, m, n, k,
                                                   lda, ldc);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldc) {
     return detail::ormtr_scratchpad_size<fp_type>(get_device_id(queue), queue, side, uplo, trans, m,
                                                   n, lda, ldc);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::potrf_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
     return detail::potrs_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, nrhs, lda,
                                                   ldb);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::potri_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::sytrf_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
+std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
     return detail::syevd_scratchpad_size<fp_type>(get_device_id(queue), queue, jobz, uplo, n, lda);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldb) {
     return detail::sygvd_scratchpad_size<fp_type>(get_device_id(queue), queue, itype, jobz, uplo, n,
                                                   lda, ldb);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::sytrd_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, oneapi::math::diag diag,
                                    std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                    std::int64_t ldb) {
     return detail::trtrs_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, trans, diag, n,
                                                   nrhs, lda, ldb);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m,
                                    std::int64_t n, std::int64_t k, std::int64_t lda) {
     return detail::ungbr_scratchpad_size<fp_type>(get_device_id(queue), queue, vect, m, n, k, lda);
 }
@@ -2236,51 +2241,51 @@ std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int6
     return detail::ungqr_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, k, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
     return detail::ungtr_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda, std::int64_t ldc) {
     return detail::unmrq_scratchpad_size<fp_type>(get_device_id(queue), queue, side, trans, m, n, k,
                                                   lda, ldc);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda, std::int64_t ldc) {
     return detail::unmqr_scratchpad_size<fp_type>(get_device_id(queue), queue, side, trans, m, n, k,
                                                   lda, ldc);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldc) {
     return detail::unmtr_scratchpad_size<fp_type>(get_device_id(queue), queue, side, uplo, trans, m,
                                                   n, lda, ldc);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_ipiv, std::int64_t batch_size) {
     return detail::getrf_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, lda,
                                                         stride_a, stride_ipiv, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t stride_ipiv,
                                          std::int64_t batch_size) {
     return detail::getri_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, n, lda,
                                                         stride_a, stride_ipiv, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans,
                                          std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t stride_ipiv,
                                          std::int64_t ldb, std::int64_t stride_b,
@@ -2289,30 +2294,30 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transp
                                                         lda, stride_a, stride_ipiv, ldb, stride_b,
                                                         batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_tau, std::int64_t batch_size) {
     return detail::geqrf_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, lda,
                                                         stride_a, stride_tau, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t lda, std::int64_t stride_a,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t batch_size) {
     return detail::potrf_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda,
                                                         stride_a, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size) {
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t nrhs, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t ldb,
+                                         std::int64_t stride_b, std::int64_t batch_size) {
     return detail::potrs_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, nrhs,
                                                         lda, stride_a, ldb, stride_b, batch_size);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                          std::int64_t k, std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_tau, std::int64_t batch_size) {
@@ -2320,35 +2325,35 @@ std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std
                                                         stride_a, stride_tau, batch_size);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                          std::int64_t k, std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_tau, std::int64_t batch_size) {
     return detail::ungqr_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, k, lda,
                                                         stride_a, stride_tau, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
     return detail::getrf_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, lda,
                                                         group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
     return detail::getri_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, n, lda,
                                                         group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans,
                                          std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
                                          std::int64_t* ldb, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
     return detail::getrs_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, trans, n, nrhs,
                                                         lda, ldb, group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
@@ -2356,22 +2361,22 @@ std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, st
                                                         group_count, group_sizes);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                                          std::int64_t* k, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
     return detail::orgqr_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, m, n, k, lda,
                                                         group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
     return detail::potrf_batch_scratchpad_size<fp_type>(get_device_id(queue), queue, uplo, n, lda,
                                                         group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
                                          std::int64_t* ldb, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
@@ -2379,7 +2384,7 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo*
                                                         lda, ldb, group_count, group_sizes);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                                          std::int64_t* k, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
@@ -2388,5 +2393,5 @@ std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, st
 }
 
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/include/oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx b/include/oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx
similarity index 60%
rename from include/oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx
rename to include/oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx
index f952eb3fd..1dd86d2ed 100644
--- a/include/oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx
+++ b/include/oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx
@@ -24,24 +24,24 @@ static inline void gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std
                          sycl::buffer<std::complex<float>>& taup,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                taup, scratchpad, scratchpad_size);
 }
 static inline void gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<double>& tauq, sycl::buffer<double>& taup,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                taup, scratchpad, scratchpad_size);
 }
 static inline void gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tauq,
                          sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                taup, scratchpad, scratchpad_size);
 }
 static inline void gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
@@ -50,677 +50,706 @@ static inline void gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std
                          sycl::buffer<std::complex<double>>& taup,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                taup, scratchpad, scratchpad_size);
 }
 static inline void gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                                scratchpad_size);
 }
 static inline void getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
 static inline void getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
 static inline void getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
 static inline void getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
 static inline void getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                scratchpad_size);
 }
 static inline void getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                scratchpad_size);
 }
 static inline void getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                scratchpad_size);
 }
 static inline void getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                scratchpad_size);
 }
 static inline void getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
                          std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                               b, ldb, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+                                                b, ldb, scratchpad, scratchpad_size);
 }
 static inline void getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                               b, ldb, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+                                                b, ldb, scratchpad, scratchpad_size);
 }
 static inline void getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                               b, ldb, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+                                                b, ldb, scratchpad, scratchpad_size);
 }
 static inline void getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
                          std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                               b, ldb, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
+                                                b, ldb, scratchpad, scratchpad_size);
 }
 static inline void gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                         oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
                          sycl::buffer<double>& vt, std::int64_t ldvt,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                               u, ldu, vt, ldvt, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
 static inline void gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                         oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
                          sycl::buffer<float>& vt, std::int64_t ldvt,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                               u, ldu, vt, ldvt, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
 static inline void gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                         oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u,
                          std::int64_t ldu, sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                               u, ldu, vt, ldvt, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
 static inline void gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                         oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u,
                          std::int64_t ldu, sycl::buffer<std::complex<double>>& vt,
                          std::int64_t ldvt, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                               u, ldu, vt, ldvt, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
+                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size);
 }
-static inline void heevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void heevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                scratchpad, scratchpad_size);
 }
-static inline void heevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
+static inline void heevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                scratchpad, scratchpad_size);
 }
 static inline void hegvd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                               b, ldb, w, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+                                                b, ldb, w, scratchpad, scratchpad_size);
 }
 static inline void hegvd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                               b, ldb, w, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+                                                b, ldb, w, scratchpad, scratchpad_size);
 }
-static inline void hetrd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void hetrd(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<float>& d, sycl::buffer<float>& e,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void hetrd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void hetrd(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void hetrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void hetrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
-static inline void hetrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void hetrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
 static inline void orgbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
 static inline void orgbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
 static inline void orgqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
 static inline void orgqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void orgtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+static inline void orgtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<float>& tau,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void orgtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+static inline void orgtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<double>& tau,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void ormtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                               lda, tau, c, ldc, scratchpad, scratchpad_size);
+static inline void ormtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                         sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                lda, tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+static inline void ormtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                         sycl::buffer<double>& c, std::int64_t ldc,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                               lda, tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                lda, tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormrq(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+static inline void ormrq(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormrq(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+static inline void ormrq(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormqr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+static inline void ormqr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void ormqr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+static inline void ormqr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void potrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void potri(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                               scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                               scratchpad, scratchpad_size);
+static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
+                         std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                                scratchpad, scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                               scratchpad, scratchpad_size);
+static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
+                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                                scratchpad, scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                                scratchpad, scratchpad_size);
 }
-static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+static inline void potrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                                scratchpad, scratchpad_size);
 }
-static inline void syevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+static inline void syevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
                          std::int64_t lda, sycl::buffer<double>& w,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                scratchpad, scratchpad_size);
 }
-static inline void syevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+static inline void syevd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
                          std::int64_t lda, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                scratchpad, scratchpad_size);
 }
 static inline void sygvd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
                          std::int64_t ldb, sycl::buffer<double>& w,
                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                               b, ldb, w, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+                                                b, ldb, w, scratchpad, scratchpad_size);
 }
 static inline void sygvd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
                          std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                               b, ldb, w, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
+                                                b, ldb, w, scratchpad, scratchpad_size);
 }
-static inline void sytrd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
+static inline void sytrd(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<double>& d, sycl::buffer<double>& e,
                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void sytrd(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                               scratchpad, scratchpad_size);
-}
-static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+static inline void sytrd(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                               scratchpad, scratchpad_size);
+static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
-static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
+}
+static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
-static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::int64_t>& ipiv,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                scratchpad, scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                               lda, b, ldb, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
+                                                lda, b, ldb, scratchpad, scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                               lda, b, ldb, scratchpad, scratchpad_size);
+static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
+                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
+                                                lda, b, ldb, scratchpad, scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
+                         std::int64_t ldb, sycl::buffer<float>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                               lda, b, ldb, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
+                                                lda, b, ldb, scratchpad, scratchpad_size);
 }
-static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                         oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                               lda, b, ldb, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
+                                                lda, b, ldb, scratchpad, scratchpad_size);
 }
 static inline void ungbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
 static inline void ungbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
 static inline void ungqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
 static inline void ungqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
                          std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void ungtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void ungtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void ungtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void ungtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                               scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                scratchpad, scratchpad_size);
 }
-static inline void unmrq(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
+static inline void unmrq(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmrq(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
+static inline void unmrq(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmqr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
+static inline void unmqr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmqr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
+static inline void unmqr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                               tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
+                                                tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+static inline void unmtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<float>>& tau,
                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<float>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                               lda, tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                lda, tau, c, ldc, scratchpad, scratchpad_size);
 }
-static inline void unmtr(backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+static inline void unmtr(backend_selector<backend::LAPACK_BACKEND> selector,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                          sycl::buffer<std::complex<double>>& tau,
                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                          sycl::buffer<std::complex<double>>& scratchpad,
                          std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                               lda, tau, c, ldc, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                lda, tau, c, ldc, scratchpad, scratchpad_size);
 }
 static inline void geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<float>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     tau, stride_tau, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      tau, stride_tau, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<double>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     tau, stride_tau, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      tau, stride_tau, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<float>>& a,
@@ -729,9 +758,9 @@ static inline void geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     tau, stride_tau, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      tau, stride_tau, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<double>>& a,
@@ -740,27 +769,27 @@ static inline void geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     tau, stride_tau, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      tau, stride_tau, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getri_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getri_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getri_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
@@ -768,9 +797,9 @@ static inline void getri_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getri_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
@@ -778,73 +807,73 @@ static inline void getri_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                     stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                                     batch_size, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                      stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                      batch_size, scratchpad, scratchpad_size);
 }
 static inline void getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                                sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                     stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                                     batch_size, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                      stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                      batch_size, scratchpad, scratchpad_size);
 }
 static inline void getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, sycl::buffer<std::complex<float>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                     stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                                     batch_size, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                      stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                      batch_size, scratchpad, scratchpad_size);
 }
 static inline void getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, sycl::buffer<std::complex<double>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                     stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                                     batch_size, scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                      stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                      batch_size, scratchpad, scratchpad_size);
 }
 static inline void getrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                                std::int64_t stride_ipiv, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<float>>& a,
@@ -853,9 +882,9 @@ static inline void getrf_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void getrf_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, sycl::buffer<std::complex<double>>& a,
@@ -864,101 +893,101 @@ static inline void getrf_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                     ipiv, stride_ipiv, batch_size, scratchpad,
-                                                     scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
+                                                      ipiv, stride_ipiv, batch_size, scratchpad,
+                                                      scratchpad_size);
 }
 static inline void orgqr_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
                                std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                     stride_a, tau, stride_tau, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                      stride_a, tau, stride_tau, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline void orgqr_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
                                std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                     stride_a, tau, stride_tau, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                      stride_a, tau, stride_tau, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline void potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
                                std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(
         selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size);
 }
 static inline void potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
                                std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(
         selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size);
 }
 static inline void potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n,
+                               oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(
         selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size);
 }
 static inline void potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n,
+                               oneapi::math::uplo uplo, std::int64_t n,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(
         selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size);
 }
 static inline void potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                     stride_a, b, ldb, stride_b, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                      stride_a, b, ldb, stride_b, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline void potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                                sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                     stride_a, b, ldb, stride_b, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                      stride_a, b, ldb, stride_b, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline void potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                     stride_a, b, ldb, stride_b, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                      stride_a, b, ldb, stride_b, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline void potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& b,
                                std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                     stride_a, b, ldb, stride_b, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                      stride_a, b, ldb, stride_b, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline void ungqr_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
@@ -967,9 +996,9 @@ static inline void ungqr_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t batch_size,
                                sycl::buffer<std::complex<float>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                     stride_a, tau, stride_tau, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                      stride_a, tau, stride_tau, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline void ungqr_batch(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                std::int64_t n, std::int64_t k,
@@ -978,36 +1007,36 @@ static inline void ungqr_batch(backend_selector<backend::LAPACK_BACKEND> selecto
                                std::int64_t stride_tau, std::int64_t batch_size,
                                sycl::buffer<std::complex<double>>& scratchpad,
                                std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                     stride_a, tau, stride_tau, batch_size,
-                                                     scratchpad, scratchpad_size);
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                      stride_a, tau, stride_tau, batch_size,
+                                                      scratchpad, scratchpad_size);
 }
 static inline sycl::event gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda, float* d,
                                 float* e, std::complex<float>* tauq, std::complex<float>* taup,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
-                                                      tauq, taup, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
+                                                       tauq, taup, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, double* d, double* e,
                                 double* tauq, double* taup, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
-                                                      tauq, taup, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
+                                                       tauq, taup, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, float* d, float* e,
                                 float* tauq, float* taup, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
-                                                      tauq, taup, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
+                                                       tauq, taup, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event gebrd(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
@@ -1015,623 +1044,624 @@ static inline sycl::event gebrd(backend_selector<backend::LAPACK_BACKEND> select
                                 std::complex<double>* taup, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
-                                                      tauq, taup, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e,
+                                                       tauq, taup, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, float* tau,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, double* tau,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event gerqf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, double* tau,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, float* tau,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                 std::int64_t* ipiv, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrf(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 std::int64_t* ipiv, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                 double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getri(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* b, std::int64_t ldb,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                       ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 double* a, std::int64_t lda, std::int64_t* ipiv, double* b,
                                 std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                       ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* b,
                                 std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                       ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event getrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* b, std::int64_t ldb,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                       ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* s, double* u,
-                                std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad,
-                                std::int64_t scratchpad_size,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                double* s, double* u, std::int64_t ldu, double* vt,
+                                std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
-                                                      lda, s, u, ldu, vt, ldvt, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
+                                                       lda, s, u, ldu, vt, ldvt, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* s, float* u,
-                                std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad,
-                                std::int64_t scratchpad_size,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt,
+                                float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
-                                                      lda, s, u, ldu, vt, ldvt, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
+                                                       lda, s, u, ldu, vt, ldvt, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* s,
-                                std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                                std::int64_t ldvt, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, float* s, std::complex<float>* u,
+                                std::int64_t ldu, std::complex<float>* vt, std::int64_t ldvt,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
-                                                      lda, s, u, ldu, vt, ldvt, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
+                                                       lda, s, u, ldu, vt, ldvt, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event gesvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* s, std::complex<double>* u, std::int64_t ldu,
-                                std::complex<double>* vt, std::int64_t ldvt,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, double* s, std::complex<double>* u,
+                                std::int64_t ldu, std::complex<double>* vt, std::int64_t ldvt,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
-                                                      lda, s, u, ldu, vt, ldvt, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a,
+                                                       lda, s, u, ldu, vt, ldvt, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event heevd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, float* w,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda,
-                                                      w, scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::heevd(
+        selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event heevd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda, double* w,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda,
-                                                      w, scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::heevd(
+        selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event hegvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* b, std::int64_t ldb, float* w,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a,
-                                                      lda, b, ldb, w, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n,
+                                                       a, lda, b, ldb, w, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event hegvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* b, std::int64_t ldb, double* w,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a,
-                                                      lda, b, ldb, w, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n,
+                                                       a, lda, b, ldb, w, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event hetrd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, float* d, float* e, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e,
-                                                      tau, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e,
+                                                       tau, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event hetrd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, double* d, double* e, std::complex<double>* tau,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e,
-                                                      tau, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e,
+                                                       tau, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event hetrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event hetrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event orgbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, float* a, std::int64_t lda, float* tau,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgbr(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgbr(
         selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event orgbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgbr(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgbr(
         selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event orgqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                                 double* tau, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event orgqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event orgtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event orgtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* tau, double* scratchpad,
+                                std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event ormtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n,
-                                                      a, lda, tau, c, ldc, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m,
+                                                       n, a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event ormtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 double* a, std::int64_t lda, double* tau, double* c,
                                 std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n,
-                                                      a, lda, tau, c, ldc, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m,
+                                                       n, a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event ormrq(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, float* a,
                                 std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event ormrq(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, double* a,
                                 std::int64_t lda, double* tau, double* c, std::int64_t ldc,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event ormqr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, double* a,
                                 std::int64_t lda, double* tau, double* c, std::int64_t ldc,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event ormqr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k, float* a,
                                 std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event potrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potri(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potri(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potri(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potri(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                       b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event potrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                 double* a, std::int64_t lda, double* b, std::int64_t ldb,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                       b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event potrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                                 std::int64_t ldb, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                       b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event potrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                 std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                                 std::int64_t ldb, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                       b, ldb, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event syevd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 double* a, std::int64_t lda, double* w, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda,
-                                                      w, scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::syevd(
+        selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event syevd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                 float* a, std::int64_t lda, float* w, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda,
-                                                      w, scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::syevd(
+        selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event sygvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, double* a, std::int64_t lda, double* b,
                                 std::int64_t ldb, double* w, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a,
-                                                      lda, b, ldb, w, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n,
+                                                       a, lda, b, ldb, w, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event sygvd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
                                 std::int64_t n, float* a, std::int64_t lda, float* b,
                                 std::int64_t ldb, float* w, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a,
-                                                      lda, b, ldb, w, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n,
+                                                       a, lda, b, ldb, w, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event sytrd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* d, double* e, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* d, double* e, double* tau,
+                                double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e,
-                                                      tau, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e,
+                                                       tau, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event sytrd(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 float* d, float* e, float* tau, float* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e,
-                                                      tau, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e,
+                                                       tau, scratchpad, scratchpad_size,
+                                                       dependencies);
 }
 static inline sycl::event sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
                                 std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                std::int64_t* ipiv, double* scratchpad,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event sytrf(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::int64_t* ipiv,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                                 std::int64_t ldb, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
-                                                      nrhs, a, lda, b, ldb, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
+                                                       nrhs, a, lda, b, ldb, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                 double* a, std::int64_t lda, double* b, std::int64_t ldb,
                                 double* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
-                                                      nrhs, a, lda, b, ldb, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
+                                                       nrhs, a, lda, b, ldb, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                float* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
-                                                      nrhs, a, lda, b, ldb, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
+                                                       nrhs, a, lda, b, ldb, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event trtrs(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                 std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                                 std::int64_t ldb, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
-                                                      nrhs, a, lda, b, ldb, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n,
+                                                       nrhs, a, lda, b, ldb, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event ungbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
                                 std::complex<float>* tau, std::complex<float>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungbr(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungbr(
         selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event ungbr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungbr(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungbr(
         selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event ungqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
@@ -1639,101 +1669,101 @@ static inline sycl::event ungqr(backend_selector<backend::LAPACK_BACKEND> select
                                 std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event ungqr(backend_selector<backend::LAPACK_BACKEND> selector, std::int64_t m,
                                 std::int64_t n, std::int64_t k, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* tau,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event ungtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                 std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event ungtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                 std::int64_t lda, std::complex<double>* tau,
                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                      scratchpad, scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                       scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event unmrq(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event unmrq(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event unmqr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event unmqr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                oneapi::math::side side, oneapi::math::transpose trans,
                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a,
-                                                      lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                      dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k,
+                                                       a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event unmtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                                 std::complex<float>* c, std::int64_t ldc,
                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n,
-                                                      a, lda, tau, c, ldc, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m,
+                                                       n, a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event unmtr(backend_selector<backend::LAPACK_BACKEND> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                 std::complex<double>* a, std::int64_t lda,
                                 std::complex<double>* tau, std::complex<double>* c,
                                 std::int64_t ldc, std::complex<double>* scratchpad,
                                 std::int64_t scratchpad_size,
                                 const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n,
-                                                      a, lda, tau, c, ldc, scratchpad,
-                                                      scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m,
+                                                       n, a, lda, tau, c, ldc, scratchpad,
+                                                       scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
@@ -1741,7 +1771,7 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1751,7 +1781,7 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1762,7 +1792,7 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t batch_size, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1773,7 +1803,7 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t batch_size, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1783,9 +1813,9 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
+        selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t* m, std::int64_t* n, double** a,
@@ -1793,9 +1823,9 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
+        selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t* m, std::int64_t* n, std::complex<float>** a,
@@ -1803,9 +1833,9 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
+        selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies);
 }
 static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t* m, std::int64_t* n, std::complex<double>** a,
@@ -1814,9 +1844,9 @@ static inline sycl::event geqrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(
+        selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies);
 }
 static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
@@ -1824,7 +1854,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1834,7 +1864,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1844,7 +1874,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1855,7 +1885,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1865,7 +1895,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1875,7 +1905,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1885,7 +1915,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1896,7 +1926,7 @@ static inline sycl::event getrf_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(
         selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1906,7 +1936,7 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(
         selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1916,7 +1946,7 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(
         selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1926,7 +1956,7 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_ipiv, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(
         selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1937,7 +1967,7 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(
         selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -1947,9 +1977,9 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t* n, double** a, std::int64_t* lda,
@@ -1957,9 +1987,9 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
@@ -1967,9 +1997,9 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND> selector,
                                       std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
@@ -1977,36 +2007,36 @@ static inline sycl::event getri_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
+                                      oneapi::math::transpose trans, std::int64_t n,
                                       std::int64_t nrhs, float* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, float* b, std::int64_t ldb,
                                       std::int64_t stride_b, std::int64_t batch_size,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
+                                      oneapi::math::transpose trans, std::int64_t n,
                                       std::int64_t nrhs, double* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, double* b, std::int64_t ldb,
                                       std::int64_t stride_b, std::int64_t batch_size,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
+                                      oneapi::math::transpose trans, std::int64_t n,
                                       std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::complex<float>* b,
@@ -2014,12 +2044,12 @@ static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t batch_size, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
+                                      oneapi::math::transpose trans, std::int64_t n,
                                       std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t* ipiv,
                                       std::int64_t stride_ipiv, std::complex<double>* b,
@@ -2027,51 +2057,51 @@ static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t batch_size, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
         batch_size, scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
                                       std::int64_t* nrhs, float** a, std::int64_t* lda,
                                       std::int64_t** ipiv, float** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes,
         scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
                                       std::int64_t* nrhs, double** a, std::int64_t* lda,
                                       std::int64_t** ipiv, double** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes,
         scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
                                       std::int64_t* nrhs, std::complex<float>** a,
                                       std::int64_t* lda, std::int64_t** ipiv,
                                       std::complex<float>** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes,
         scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event getrs_batch(
-    backend_selector<backend::LAPACK_BACKEND> selector, oneapi::mkl::transpose* trans,
+    backend_selector<backend::LAPACK_BACKEND> selector, oneapi::math::transpose* trans,
     std::int64_t* n, std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda,
     std::int64_t** ipiv, std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
     std::int64_t* group_sizes, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
     const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(
         selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes,
         scratchpad, scratchpad_size, dependencies);
 }
@@ -2081,7 +2111,7 @@ static inline sycl::event orgqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_tau, std::int64_t batch_size,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(
         selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2091,7 +2121,7 @@ static inline sycl::event orgqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_tau, std::int64_t batch_size,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(
         selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2101,7 +2131,7 @@ static inline sycl::event orgqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(
         selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2111,178 +2141,178 @@ static inline sycl::event orgqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(
         selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+                                      oneapi::math::uplo uplo, std::int64_t n, float* a,
                                       std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            stride_a, batch_size, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             stride_a, batch_size, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+                                      oneapi::math::uplo uplo, std::int64_t n, double* a,
                                       std::int64_t lda, std::int64_t stride_a,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            stride_a, batch_size, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             stride_a, batch_size, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
+                                      oneapi::math::uplo uplo, std::int64_t n,
                                       std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            stride_a, batch_size, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             stride_a, batch_size, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
+                                      oneapi::math::uplo uplo, std::int64_t n,
                                       std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::int64_t batch_size,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            stride_a, batch_size, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             stride_a, batch_size, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, float** a,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, float** a,
                                       std::int64_t* lda, std::int64_t group_count,
                                       std::int64_t* group_sizes, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, double** a,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, double** a,
                                       std::int64_t* lda, std::int64_t group_count,
                                       std::int64_t* group_sizes, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
+                                      oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::complex<float>** a, std::int64_t* lda,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrf_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
+                                      oneapi::math::uplo* uplo, std::int64_t* n,
                                       std::complex<double>** a, std::int64_t* lda,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                            group_count, group_sizes, scratchpad,
-                                                            scratchpad_size, dependencies);
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                             group_count, group_sizes, scratchpad,
+                                                             scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       float* a, std::int64_t lda, std::int64_t stride_a, float* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, float* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       double* a, std::int64_t lda, std::int64_t stride_a, double* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, double* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       std::complex<float>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<float>* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                       std::complex<double>* a, std::int64_t lda,
                                       std::int64_t stride_a, std::complex<double>* b,
                                       std::int64_t ldb, std::int64_t stride_b,
                                       std::int64_t batch_size, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       float* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       double** a, std::int64_t* lda, double** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       double* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       std::complex<float>** a, std::int64_t* lda,
                                       std::complex<float>** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
 static inline sycl::event potrs_batch(backend_selector<backend::LAPACK_BACKEND> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
                                       std::complex<double>** a, std::int64_t* lda,
                                       std::complex<double>** b, std::int64_t* ldb,
                                       std::int64_t group_count, std::int64_t* group_sizes,
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(
         selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2293,7 +2323,7 @@ static inline sycl::event ungqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t stride_tau, std::int64_t batch_size,
                                       std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(
         selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2305,7 +2335,7 @@ static inline sycl::event ungqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(
         selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2316,7 +2346,7 @@ static inline sycl::event ungqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(
         selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2327,7 +2357,7 @@ static inline sycl::event ungqr_batch(backend_selector<backend::LAPACK_BACKEND>
                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
                                       std::int64_t scratchpad_size,
                                       const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(
         selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
@@ -2335,360 +2365,360 @@ static inline sycl::event ungqr_batch(backend_selector<backend::LAPACK_BACKEND>
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
 std::int64_t gebrd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                    std::int64_t m, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               m, n, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t gerqf_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                    std::int64_t m, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               m, n, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
 std::int64_t geqrf_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                    std::int64_t m, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               m, n, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t gesvd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+                                   oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                                    std::int64_t m, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldu, std::int64_t ldvt) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<fp_type>(
         selector.get_queue(), jobu, jobvt, m, n, lda, ldu, ldvt);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t gesvd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+                                   oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                                    std::int64_t m, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldu, std::int64_t ldvt) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<fp_type>(
         selector.get_queue(), jobu, jobvt, m, n, lda, ldu, ldvt);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrf_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                    std::int64_t m, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               m, n, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getri_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                    std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               n, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_scratchpad_size<fp_type>(
+        selector.get_queue(), n, lda);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrs_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                   oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                    std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_scratchpad_size<fp_type>(
         selector.get_queue(), trans, n, nrhs, lda, ldb);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t heevd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::heevd_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               jobz, uplo, n, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::heevd_scratchpad_size<fp_type>(
+        selector.get_queue(), jobz, uplo, n, lda);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t hegvd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldb) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hegvd_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::hegvd_scratchpad_size<fp_type>(
         selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t hetrd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hetrd_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::hetrd_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t hetrf_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::hetrf_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::hetrf_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t orgbr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgbr_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               vect, m, n, k, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::orgbr_scratchpad_size<fp_type>(
+        selector.get_queue(), vect, m, n, k, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t orgtr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgtr_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::orgtr_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t orgqr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                    std::int64_t m, std::int64_t n, std::int64_t k,
                                    std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               m, n, k, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, k, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t ormrq_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormrq_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::ormrq_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t ormqr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormqr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::ormqr_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t ormtr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t lda, std::int64_t ldc) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ormtr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::ormtr_scratchpad_size<fp_type>(
         selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
 std::int64_t potrf_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t potrs_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                    std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, nrhs, lda, ldb);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t potri_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potri_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::potri_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t sytrf_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t syevd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::syevd_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               jobz, uplo, n, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::syevd_scratchpad_size<fp_type>(
+        selector.get_queue(), jobz, uplo, n, lda);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t sygvd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                    std::int64_t ldb) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sygvd_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::sygvd_scratchpad_size<fp_type>(
         selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
 }
 template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t sytrd_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::sytrd_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::sytrd_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
 template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
 std::int64_t trtrs_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                   oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                                    std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, trans, diag, n, nrhs, lda, ldb);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t ungbr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungbr_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               vect, m, n, k, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::ungbr_scratchpad_size<fp_type>(
+        selector.get_queue(), vect, m, n, k, lda);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t ungqr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                    std::int64_t m, std::int64_t n, std::int64_t k,
                                    std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               m, n, k, lda);
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, k, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t ungtr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungtr_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                               uplo, n, lda);
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::LAPACK_BACKEND::ungtr_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t unmrq_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmrq_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::unmrq_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t unmqr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                    std::int64_t ldc) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmqr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::unmqr_scratchpad_size<fp_type>(
         selector.get_queue(), side, trans, m, n, k, lda, ldc);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t unmtr_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t lda, std::int64_t ldc) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::unmtr_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::unmtr_scratchpad_size<fp_type>(
         selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t stride_ipiv,
                                          std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, stride_a, stride_ipiv, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getri_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t n, std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_ipiv, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<fp_type>(
         selector.get_queue(), n, lda, stride_a, stride_ipiv, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                         oneapi::mkl::transpose trans, std::int64_t n,
+                                         oneapi::math::transpose trans, std::int64_t n,
                                          std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_ipiv, std::int64_t ldb,
                                          std::int64_t stride_b, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b,
         batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t stride_tau,
                                          std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, stride_a, stride_tau, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, lda, stride_a, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                                          std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
                                          std::int64_t stride_b, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t k,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_tau, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t m, std::int64_t n, std::int64_t k,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t stride_tau, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getri_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<fp_type>(
         selector.get_queue(), n, lda, group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                         oneapi::mkl::transpose* trans, std::int64_t* n,
+                                         oneapi::math::transpose* trans, std::int64_t* n,
                                          std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, lda, group_count, group_sizes);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
 std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* k,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, lda, group_count, group_sizes);
 }
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
 std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
                                          std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
                                          std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<fp_type>(
         selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes);
 }
 template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
 std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::LAPACK_BACKEND> selector,
                                          std::int64_t* m, std::int64_t* n, std::int64_t* k,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<fp_type>(
+    return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<fp_type>(
         selector.get_queue(), m, n, k, lda, group_count, group_sizes);
 }
diff --git a/include/oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx b/include/oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx
new file mode 100644
index 000000000..15f59a7c7
--- /dev/null
+++ b/include/oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx
@@ -0,0 +1,2169 @@
+/*******************************************************************************
+* Copyright 2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tauq,
+                          sycl::buffer<std::complex<float>>& taup,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
+                          sycl::buffer<double>& e, sycl::buffer<double>& tauq,
+                          sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
+                          sycl::buffer<float>& e, sycl::buffer<float>& tauq,
+                          sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tauq,
+                          sycl::buffer<std::complex<double>>& taup,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
+                          std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
+                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b,
+                          std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
+                          sycl::buffer<double>& vt, std::int64_t ldvt,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
+                          sycl::buffer<float>& vt, std::int64_t ldvt,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<float>& s,
+                          sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
+                          sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<double>& s,
+                          sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
+                          sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
+                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
+                          sycl::buffer<double>& e, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
+                          sycl::buffer<float>& e, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<float>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<double>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<float>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<double>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* d, float* e,
+                                 std::complex<float>* tauq, std::complex<float>* taup,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* d, double* e, double* tauq, double* taup,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* d, float* e, float* tauq, float* taup,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* d, double* e,
+                                 std::complex<double>* tauq, std::complex<double>* taup,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
+                                 std::int64_t* ipiv, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
+                                 std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* b, std::int64_t ldb, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
+                                 float* b, std::int64_t ldb, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 double* a, std::int64_t lda, double* s, double* u,
+                                 std::int64_t ldu, double* vt, std::int64_t ldvt,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu,
+                                 float* vt, std::int64_t ldvt, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* s,
+                                 std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
+                                 std::int64_t ldvt, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* s,
+                                 std::complex<double>* u, std::int64_t ldu,
+                                 std::complex<double>* vt, std::int64_t ldvt,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, float* w, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, double* w, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                 float* w, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                 double* w, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* d, float* e,
+                                 std::complex<float>* tau, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* d, double* e,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
+                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
+                                 double* tau, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 double* a, std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 float* a, std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                 float* tau, float* c, std::int64_t ldc, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                 double* tau, double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
+                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                 double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                 double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
+                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, float* a, std::int64_t lda, float* b,
+                                 std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, double* a, std::int64_t lda, double* b,
+                                 std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* w, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* w, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* b, std::int64_t ldb, double* w,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* b, std::int64_t ldb, float* w,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* d, double* e, double* tau,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* d, float* e, float* tau,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
+                                 double* b, std::int64_t ldb, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
+                                 float* b, std::int64_t ldb, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
+                                 std::int64_t ldc, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* c, std::int64_t ldc,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, float* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       double* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       float** a, std::int64_t* lda, float** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       double** a, std::int64_t* lda, double** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::complex<float>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::complex<double>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t* ipiv, std::int64_t stride_ipiv,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       float** a, std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       double** a, std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, float* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, double* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, double* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::complex<float>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::complex<double>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, float** a,
+                                       std::int64_t* lda, std::int64_t** ipiv, float** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, double** a,
+                                       std::int64_t* lda, std::int64_t** ipiv, double** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::complex<float>** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::complex<double>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, float* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, double* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, float** a, std::int64_t* lda, float** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, double** a, std::int64_t* lda, double** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       float* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, float** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, double** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, float* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, double* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, float** a,
+                                       std::int64_t* lda, float** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, double** a,
+                                       std::int64_t* lda, double** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
+                                       std::int64_t* lda, std::complex<float>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event potrs_batch(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    std::complex<double>** a, std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes, std::complex<double>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, std::complex<float>** a, std::int64_t* lda,
+                                       std::complex<float>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, std::complex<double>** a, std::int64_t* lda,
+                                       std::complex<double>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
+std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
+std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
+template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
+template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda);
+template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t n, std::int64_t nrhs, std::int64_t lda,
+                                   std::int64_t ldb);
+template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldb);
+template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m,
+                                   std::int64_t n, std::int64_t k, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldc);
+template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda);
+template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldb);
+template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
+std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, oneapi::math::diag diag,
+                                   std::int64_t n, std::int64_t nrhs, std::int64_t lda,
+                                   std::int64_t ldb);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m,
+                                   std::int64_t n, std::int64_t k, std::int64_t lda);
+template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldc);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                         std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_ipiv, std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t stride_ipiv,
+                                         std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans,
+                                         std::int64_t n, std::int64_t nrhs, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t stride_ipiv,
+                                         std::int64_t ldb, std::int64_t stride_b,
+                                         std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                         std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_tau, std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t nrhs, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t ldb,
+                                         std::int64_t stride_b, std::int64_t batch_size);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                         std::int64_t k, std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_tau, std::int64_t batch_size);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                         std::int64_t k, std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_tau, std::int64_t batch_size);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans,
+                                         std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
+                                         std::int64_t* ldb, std::int64_t group_count,
+                                         std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_real_floating_point<fp_type> = nullptr>
+std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                         std::int64_t* k, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                         std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <typename fp_type, oneapi::math::lapack::internal::is_floating_point<fp_type> = nullptr>
+std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                         std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
+                                         std::int64_t* ldb, std::int64_t group_count,
+                                         std::int64_t* group_sizes);
+template <typename fp_type,
+          oneapi::math::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
+std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                         std::int64_t* k, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::jobsvd jobu,
+                                                         oneapi::math::jobsvd jobvt, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda,
+                                                         std::int64_t ldu, std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
+    std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
+    std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
+    std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       std::int64_t m,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        std::int64_t m,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<float>(sycl::queue& queue, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<double>(sycl::queue& queue, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::transpose trans,
+                                                         std::int64_t n, std::int64_t nrhs,
+                                                         std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::transpose trans,
+                                                          std::int64_t n, std::int64_t nrhs,
+                                                          std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t heevd_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       oneapi::math::job jobz,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t heevd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        oneapi::math::job jobz,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
+    std::int64_t n, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
+    std::int64_t n, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::generate vect,
+                                                         std::int64_t m, std::int64_t n,
+                                                         std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::generate vect,
+                                                          std::int64_t m, std::int64_t n,
+                                                          std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
+                                                         std::int64_t n, std::int64_t k,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
+                                                          std::int64_t n, std::int64_t k,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size<float>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size<float>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::side side,
+                                                         oneapi::math::uplo uplo,
+                                                         oneapi::math::transpose trans,
+                                                         std::int64_t m, std::int64_t n,
+                                                         std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::side side,
+                                                          oneapi::math::uplo uplo,
+                                                          oneapi::math::transpose trans,
+                                                          std::int64_t m, std::int64_t n,
+                                                          std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t nrhs, std::int64_t lda,
+                                                         std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t nrhs, std::int64_t lda,
+                                                          std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t syevd_scratchpad_size<float>(sycl::queue& queue, oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t syevd_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::job jobz,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size<float>(sycl::queue& queue, std::int64_t itype,
+                                                         oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size<double>(sycl::queue& queue, std::int64_t itype,
+                                                          oneapi::math::job jobz,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size<float>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size<double>(sycl::queue& queue,
+                                                          oneapi::math::uplo uplo, std::int64_t n,
+                                                          std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<float>(
+    sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+    oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+    oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+    oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+    oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
+template <>
+ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k,
+    std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<float>>(sycl::queue& queue,
+                                                                       oneapi::math::uplo uplo,
+                                                                       std::int64_t n,
+                                                                       std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                                        oneapi::math::uplo uplo,
+                                                                        std::int64_t n,
+                                                                        std::int64_t lda);
+template <>
+ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda,
+    std::int64_t ldc);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
+                                                               std::int64_t n, std::int64_t lda,
+                                                               std::int64_t stride_a,
+                                                               std::int64_t stride_ipiv,
+                                                               std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
+                                                                std::int64_t n, std::int64_t lda,
+                                                                std::int64_t stride_a,
+                                                                std::int64_t stride_ipiv,
+                                                                std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t n,
+                                                               std::int64_t lda,
+                                                               std::int64_t stride_a,
+                                                               std::int64_t stride_ipiv,
+                                                               std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t n,
+                                                                std::int64_t lda,
+                                                                std::int64_t stride_a,
+                                                                std::int64_t stride_ipiv,
+                                                                std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t stride_ipiv, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
+                                                               std::int64_t n, std::int64_t lda,
+                                                               std::int64_t stride_a,
+                                                               std::int64_t stride_tau,
+                                                               std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
+                                                                std::int64_t n, std::int64_t lda,
+                                                                std::int64_t stride_a,
+                                                                std::int64_t stride_tau,
+                                                                std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue,
+                                                               oneapi::math::uplo uplo,
+                                                               std::int64_t n, std::int64_t lda,
+                                                               std::int64_t stride_a,
+                                                               std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue,
+                                                                oneapi::math::uplo uplo,
+                                                                std::int64_t n, std::int64_t lda,
+                                                                std::int64_t stride_a,
+                                                                std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
+                                                               std::int64_t* n, std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
+                                                                std::int64_t* n, std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* n,
+                                                               std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* n,
+                                                                std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count,
+    std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count,
+    std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
+                                                               std::int64_t* n, std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
+                                                                std::int64_t* n, std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
+                                                               std::int64_t* n, std::int64_t* k,
+                                                               std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
+                                                                std::int64_t* n, std::int64_t* k,
+                                                                std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue,
+                                                               oneapi::math::uplo* uplo,
+                                                               std::int64_t* n, std::int64_t* lda,
+                                                               std::int64_t group_count,
+                                                               std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue,
+                                                                oneapi::math::uplo* uplo,
+                                                                std::int64_t* n, std::int64_t* lda,
+                                                                std::int64_t group_count,
+                                                                std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
+    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
+template <>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda,
+    std::int64_t group_count, std::int64_t* group_sizes);
diff --git a/include/oneapi/mkl/lapack/detail/mklcpu/lapack_ct.hpp b/include/oneapi/math/lapack/detail/mklcpu/lapack_ct.hpp
similarity index 78%
rename from include/oneapi/mkl/lapack/detail/mklcpu/lapack_ct.hpp
rename to include/oneapi/math/lapack/detail/mklcpu/lapack_ct.hpp
index 1a6c088d6..21aa41248 100644
--- a/include/oneapi/mkl/lapack/detail/mklcpu/lapack_ct.hpp
+++ b/include/oneapi/math/lapack/detail/mklcpu/lapack_ct.hpp
@@ -28,19 +28,19 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/lapack/detail/mklcpu/onemkl_lapack_mklcpu.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/lapack/detail/mklcpu/onemath_lapack_mklcpu.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 
 #define LAPACK_BACKEND mklcpu
-#include "oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx"
+#include "oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx"
 #undef LAPACK_BACKEND
 
 } //namespace lapack
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
diff --git a/include/oneapi/mkl/lapack/detail/mklcpu/onemkl_lapack_mklcpu.hpp b/include/oneapi/math/lapack/detail/mklcpu/onemath_lapack_mklcpu.hpp
similarity index 82%
rename from include/oneapi/mkl/lapack/detail/mklcpu/onemkl_lapack_mklcpu.hpp
rename to include/oneapi/math/lapack/detail/mklcpu/onemath_lapack_mklcpu.hpp
index fc52ce1db..3341b9e30 100644
--- a/include/oneapi/mkl/lapack/detail/mklcpu/onemkl_lapack_mklcpu.hpp
+++ b/include/oneapi/math/lapack/detail/mklcpu/onemath_lapack_mklcpu.hpp
@@ -28,18 +28,18 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace mklcpu {
 
-#include "oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx"
+#include "oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx"
 
 } //namespace mklcpu
 } //namespace lapack
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
diff --git a/include/oneapi/mkl/lapack/detail/mklgpu/lapack_ct.hpp b/include/oneapi/math/lapack/detail/mklgpu/lapack_ct.hpp
similarity index 78%
rename from include/oneapi/mkl/lapack/detail/mklgpu/lapack_ct.hpp
rename to include/oneapi/math/lapack/detail/mklgpu/lapack_ct.hpp
index e344966a0..ece801387 100644
--- a/include/oneapi/mkl/lapack/detail/mklgpu/lapack_ct.hpp
+++ b/include/oneapi/math/lapack/detail/mklgpu/lapack_ct.hpp
@@ -27,19 +27,19 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/lapack/detail/mklgpu/onemkl_lapack_mklgpu.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/lapack/detail/mklgpu/onemath_lapack_mklgpu.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 
 #define LAPACK_BACKEND mklgpu
-#include "oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx"
+#include "oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx"
 #undef LAPACK_BACKEND
 
 } //namespace lapack
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
diff --git a/include/oneapi/mkl/lapack/detail/mklgpu/onemkl_lapack_mklgpu.hpp b/include/oneapi/math/lapack/detail/mklgpu/onemath_lapack_mklgpu.hpp
similarity index 82%
rename from include/oneapi/mkl/lapack/detail/mklgpu/onemkl_lapack_mklgpu.hpp
rename to include/oneapi/math/lapack/detail/mklgpu/onemath_lapack_mklgpu.hpp
index 132431b7c..4eb216ad2 100644
--- a/include/oneapi/mkl/lapack/detail/mklgpu/onemkl_lapack_mklgpu.hpp
+++ b/include/oneapi/math/lapack/detail/mklgpu/onemath_lapack_mklgpu.hpp
@@ -28,18 +28,18 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace mklgpu {
 
-#include "oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx"
+#include "oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx"
 
 } //namespace mklgpu
 } //namespace lapack
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
diff --git a/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hpp b/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hpp
similarity index 84%
rename from include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hpp
rename to include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hpp
index 5e98b7c47..126d62ca6 100644
--- a/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hpp
+++ b/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hpp
@@ -30,13 +30,13 @@
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
-#include "oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
+#include "oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 
 #define LAPACK_BACKEND rocsolver
@@ -44,7 +44,7 @@ namespace lapack {
 #undef LAPACK_BACKEND
 
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif //_DETAIL_ROCSOLVER_LAPACK_CT_HPP_
diff --git a/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hxx b/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hxx
new file mode 100644
index 000000000..f9db456a9
--- /dev/null
+++ b/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hxx
@@ -0,0 +1,2658 @@
+/***************************************************************************
+*  Copyright (C) Codeplay Software Limited
+*  Copyright 2022 Intel Corporation
+*
+*  Licensed under the Apache License, Version 2.0 (the "License");
+*  you may not use this file except in compliance with the License.
+*  You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+*  For your convenience, a copy of the License has been included in this
+*  repository.
+*
+*  Unless required by applicable law or agreed to in writing, software
+*  distributed under the License is distributed on an "AS IS" BASIS,
+*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+*  See the License for the specific language governing permissions and
+*  limitations under the License.
+*
+**************************************************************************/
+
+// Buffer APIs
+
+static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<float>& d, sycl::buffer<float>& e,
+                         sycl::buffer<std::complex<float>>& tauq,
+                         sycl::buffer<std::complex<float>>& taup,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+                                           scratchpad, scratchpad_size);
+}
+static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& d, sycl::buffer<double>& e,
+                         sycl::buffer<double>& tauq, sycl::buffer<double>& taup,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+                                           scratchpad, scratchpad_size);
+}
+static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tauq,
+                         sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+                                           scratchpad, scratchpad_size);
+}
+static inline void gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<double>& d, sycl::buffer<double>& e,
+                         sycl::buffer<std::complex<double>>& tauq,
+                         sycl::buffer<std::complex<double>>& taup,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
+                                           scratchpad, scratchpad_size);
+}
+static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                         sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void getrs(backend_selector<backend::rocsolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
+                         std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+                                           ldb, scratchpad, scratchpad_size);
+}
+static inline void getrs(backend_selector<backend::rocsolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
+                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+                                           ldb, scratchpad, scratchpad_size);
+}
+static inline void getrs(backend_selector<backend::rocsolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+                                           ldb, scratchpad, scratchpad_size);
+}
+static inline void getrs(backend_selector<backend::rocsolver> selector,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
+                         std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b,
+                                           ldb, scratchpad, scratchpad_size);
+}
+static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& s,
+                         sycl::buffer<double>& u, std::int64_t ldu, sycl::buffer<double>& vt,
+                         std::int64_t ldvt, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
+}
+static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& s,
+                         sycl::buffer<float>& u, std::int64_t ldu, sycl::buffer<float>& vt,
+                         std::int64_t ldvt, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
+}
+static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u,
+                         std::int64_t ldu, sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
+}
+static inline void gesvd(backend_selector<backend::rocsolver> selector, oneapi::math::jobsvd jobu,
+                         oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u,
+                         std::int64_t ldu, sycl::buffer<std::complex<double>>& vt,
+                         std::int64_t ldvt, sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u,
+                                           ldu, vt, ldvt, scratchpad, scratchpad_size);
+}
+static inline void heevd(backend_selector<backend::rocsolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                           scratchpad, scratchpad_size);
+}
+static inline void heevd(backend_selector<backend::rocsolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                           scratchpad, scratchpad_size);
+}
+static inline void hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+                                           ldb, w, scratchpad, scratchpad_size);
+}
+static inline void hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+                                           ldb, w, scratchpad, scratchpad_size);
+}
+static inline void hetrd(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<float>& d, sycl::buffer<float>& e,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void hetrd(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<double>& d, sycl::buffer<double>& e,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void hetrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void hetrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void orgbr(backend_selector<backend::rocsolver> selector, oneapi::math::generate vec,
+                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<float>& tau,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void orgbr(backend_selector<backend::rocsolver> selector, oneapi::math::generate vec,
+                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<double>& tau,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void orgtr(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void orgtr(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void ormtr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+                                           tau, c, ldc, scratchpad, scratchpad_size);
+}
+static inline void ormtr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+                                           tau, c, ldc, scratchpad, scratchpad_size);
+}
+static inline void ormrq(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void ormrq(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void ormqr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void ormqr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potri(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                           scratchpad_size);
+}
+static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                           scratchpad, scratchpad_size);
+}
+static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                           scratchpad, scratchpad_size);
+}
+static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                           scratchpad, scratchpad_size);
+}
+static inline void potrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
+                                           scratchpad, scratchpad_size);
+}
+static inline void syevd(backend_selector<backend::rocsolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                         std::int64_t lda, sycl::buffer<double>& w,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                           scratchpad, scratchpad_size);
+}
+static inline void syevd(backend_selector<backend::rocsolver> selector, oneapi::math::job jobz,
+                         oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                         std::int64_t lda, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                           scratchpad, scratchpad_size);
+}
+static inline void sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
+                         std::int64_t ldb, sycl::buffer<double>& w,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+                                           ldb, w, scratchpad, scratchpad_size);
+}
+static inline void sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
+                         std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b,
+                                           ldb, w, scratchpad, scratchpad_size);
+}
+static inline void sytrd(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& d, sycl::buffer<double>& e,
+                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void sytrd(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tau,
+                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void sytrf(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::int64_t>& ipiv,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
+                                           scratchpad_size);
+}
+static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+                                           b, ldb, scratchpad, scratchpad_size);
+}
+static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                         sycl::buffer<double>& b, std::int64_t ldb,
+                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+                                           b, ldb, scratchpad, scratchpad_size);
+}
+static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+                                           b, ldb, scratchpad, scratchpad_size);
+}
+static inline void trtrs(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
+                                           b, ldb, scratchpad, scratchpad_size);
+}
+static inline void ungbr(backend_selector<backend::rocsolver> selector, oneapi::math::generate vec,
+                         std::int64_t m, std::int64_t n, std::int64_t k,
+                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void ungbr(backend_selector<backend::rocsolver> selector, oneapi::math::generate vec,
+                         std::int64_t m, std::int64_t n, std::int64_t k,
+                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                           scratchpad, scratchpad_size);
+}
+static inline void ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void ungtr(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void ungtr(backend_selector<backend::rocsolver> selector, oneapi::math::uplo uplo,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
+                                           scratchpad_size);
+}
+static inline void unmrq(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void unmrq(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void unmqr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void unmqr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau,
+                                           c, ldc, scratchpad, scratchpad_size);
+}
+static inline void unmtr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<float>>& tau,
+                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                         sycl::buffer<std::complex<float>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+                                           tau, c, ldc, scratchpad, scratchpad_size);
+}
+static inline void unmtr(backend_selector<backend::rocsolver> selector, oneapi::math::side side,
+                         oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
+                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                         sycl::buffer<std::complex<double>>& tau,
+                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                         sycl::buffer<std::complex<double>>& scratchpad,
+                         std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda,
+                                           tau, c, ldc, scratchpad, scratchpad_size);
+}
+static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<float>& tau,
+                               std::int64_t stride_tau, std::int64_t batch_size,
+                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+                                                 stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<double>& tau,
+                               std::int64_t stride_tau, std::int64_t batch_size,
+                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+                                                 stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
+                               std::int64_t batch_size,
+                               sycl::buffer<std::complex<float>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+                                                 stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
+                               std::int64_t batch_size,
+                               sycl::buffer<std::complex<double>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
+                                                 stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                               std::int64_t stride_ipiv, std::int64_t batch_size,
+                               sycl::buffer<std::complex<float>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                               std::int64_t stride_ipiv, std::int64_t batch_size,
+                               sycl::buffer<std::complex<double>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                               sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
+                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                               sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
+                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                               std::int64_t stride_ipiv, sycl::buffer<std::complex<float>>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                               sycl::buffer<std::complex<float>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void getrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                               std::int64_t stride_ipiv, sycl::buffer<std::complex<double>>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                               sycl::buffer<std::complex<double>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                 stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                               std::int64_t stride_ipiv, std::int64_t batch_size,
+                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                               std::int64_t stride_ipiv, std::int64_t batch_size,
+                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                               std::int64_t batch_size,
+                               sycl::buffer<std::complex<float>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                               std::int64_t batch_size,
+                               sycl::buffer<std::complex<double>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
+                                                 stride_ipiv, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
+                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
+                               std::int64_t stride_tau, std::int64_t batch_size,
+                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+                                                 tau, stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
+                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
+                               std::int64_t stride_tau, std::int64_t batch_size,
+                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+                                                 tau, stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                               std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
+                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                               std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
+                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n,
+                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                               std::int64_t stride_a, std::int64_t batch_size,
+                               sycl::buffer<std::complex<float>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void potrf_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n,
+                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                               std::int64_t stride_a, std::int64_t batch_size,
+                               sycl::buffer<std::complex<double>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
+                                                 batch_size, scratchpad, scratchpad_size);
+}
+static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
+                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
+                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                               sycl::buffer<std::complex<float>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void potrs_batch(backend_selector<backend::rocsolver> selector,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& b,
+                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
+                               sycl::buffer<std::complex<double>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                 stride_a, b, ldb, stride_b, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                               std::int64_t lda, std::int64_t stride_a,
+                               sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
+                               std::int64_t batch_size,
+                               sycl::buffer<std::complex<float>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+                                                 tau, stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+static inline void ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                               std::int64_t n, std::int64_t k,
+                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                               std::int64_t stride_tau, std::int64_t batch_size,
+                               sycl::buffer<std::complex<double>>& scratchpad,
+                               std::int64_t scratchpad_size) {
+    oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a,
+                                                 tau, stride_tau, batch_size, scratchpad,
+                                                 scratchpad_size);
+}
+
+// USM APIs
+
+static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* d,
+                                float* e, std::complex<float>* tauq, std::complex<float>* taup,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                  taup, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, double* a, std::int64_t lda, double* d, double* e,
+                                double* tauq, double* taup, double* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                  taup, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, float* a, std::int64_t lda, float* d, float* e,
+                                float* tauq, float* taup, float* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                  taup, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event gebrd(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                double* d, double* e, std::complex<double>* tauq,
+                                std::complex<double>* taup, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
+                                                  taup, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, float* a, std::int64_t lda, float* tau,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, double* a, std::int64_t lda, double* tau,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* tau, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event gerqf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* tau, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* tau, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, double* a, std::int64_t lda, double* tau,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, float* a, std::int64_t lda, float* tau,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* tau, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                std::int64_t* ipiv, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                std::int64_t* ipiv, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event getri(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<float>* b, std::int64_t ldb,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                  ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                double* a, std::int64_t lda, std::int64_t* ipiv, double* b,
+                                std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                  ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                float* a, std::int64_t lda, std::int64_t* ipiv, float* b,
+                                std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                  ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event getrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<double>* b, std::int64_t ldb,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda,
+                                                  ipiv, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                double* s, double* u, std::int64_t ldu, double* vt,
+                                std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda,
+                                                  s, u, ldu, vt, ldvt, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda,
+                                                  s, u, ldu, vt, ldvt, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, float* s, std::complex<float>* u,
+                                std::int64_t ldu, std::complex<float>* vt, std::int64_t ldvt,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda,
+                                                  s, u, ldu, vt, ldvt, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event gesvd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, double* s, std::complex<double>* u,
+                                std::int64_t ldu, std::complex<double>* vt, std::int64_t ldvt,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda,
+                                                  s, u, ldu, vt, ldvt, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event heevd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                std::complex<float>* a, std::int64_t lda, float* w,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event heevd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                std::complex<double>* a, std::int64_t lda, double* w,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                                std::int64_t ldb, float* w, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a,
+                                                  lda, b, ldb, w, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event hegvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
+                                std::int64_t ldb, double* w, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a,
+                                                  lda, b, ldb, w, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event hetrd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, float* d, float* e, std::complex<float>* tau,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event hetrd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, double* d, double* e, std::complex<double>* tau,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event hetrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event hetrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgbr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                std::int64_t k, float* a, std::int64_t lda, float* tau,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgbr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
+                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
+                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* tau, double* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ormtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event ormtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                double* a, std::int64_t lda, double* tau, double* c,
+                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event ormrq(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k, float* a,
+                                std::int64_t lda, float* tau, float* c, std::int64_t ldc,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event ormrq(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k, double* a,
+                                std::int64_t lda, double* tau, double* c, std::int64_t ldc,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event ormqr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k, double* a,
+                                std::int64_t lda, double* tau, double* c, std::int64_t ldc,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event ormqr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k, float* a,
+                                std::int64_t lda, float* tau, float* c, std::int64_t ldc,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potri(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
+                                                  scratchpad_size, dependencies);
+}
+static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+                                                  ldb, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                double* a, std::int64_t lda, double* b, std::int64_t ldb,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+                                                  ldb, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                                std::int64_t ldb, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+                                                  ldb, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
+                                std::int64_t ldb, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b,
+                                                  ldb, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event syevd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                double* a, std::int64_t lda, double* w, double* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event syevd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                float* a, std::int64_t lda, float* w, float* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a,
+                                                  lda, b, ldb, w, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event sygvd(backend_selector<backend::rocsolver> selector, std::int64_t itype,
+                                oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a,
+                                                  lda, b, ldb, w, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event sytrd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, double* d, double* e, double* tau,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event sytrd(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                float* d, float* e, float* tau, float* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
+                                std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event sytrf(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, std::int64_t* ipiv,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                                std::int64_t ldb, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                double* a, std::int64_t lda, double* b, std::int64_t ldb,
+                                double* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                float* a, std::int64_t lda, float* b, std::int64_t ldb,
+                                float* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event trtrs(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
+                                std::int64_t ldb, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs,
+                                                  a, lda, b, ldb, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event ungbr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                std::complex<float>* tau, std::complex<float>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungbr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::generate vec, std::int64_t m, std::int64_t n,
+                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* tau, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::int64_t k, std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* tau,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungqr(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                std::int64_t n, std::int64_t k, std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* tau,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                std::int64_t lda, std::complex<float>* tau,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                std::int64_t lda, std::complex<double>* tau,
+                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
+                                                  scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event unmrq(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k,
+                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                std::complex<float>* c, std::int64_t ldc,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event unmrq(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k,
+                                std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* tau, std::complex<double>* c,
+                                std::int64_t ldc, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event unmqr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k,
+                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                std::complex<float>* c, std::int64_t ldc,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event unmqr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::transpose trans,
+                                std::int64_t m, std::int64_t n, std::int64_t k,
+                                std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* tau, std::complex<double>* c,
+                                std::int64_t ldc, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event unmtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                std::complex<float>* c, std::int64_t ldc,
+                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event unmtr(backend_selector<backend::rocsolver> selector,
+                                oneapi::math::side side, oneapi::math::uplo uplo,
+                                oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                std::complex<double>* a, std::int64_t lda,
+                                std::complex<double>* tau, std::complex<double>* c,
+                                std::int64_t ldc, std::complex<double>* scratchpad,
+                                std::int64_t scratchpad_size,
+                                const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
+                                                  lda, tau, c, ldc, scratchpad, scratchpad_size,
+                                                  dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, float* a, std::int64_t lda,
+                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
+                                      std::int64_t batch_size, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, double* a, std::int64_t lda,
+                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
+                                      std::int64_t batch_size, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::complex<float>* tau,
+                                      std::int64_t stride_tau, std::int64_t batch_size,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::complex<double>* tau,
+                                      std::int64_t stride_tau, std::int64_t batch_size,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, float** a,
+                                      std::int64_t* lda, float** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, double** a,
+                                      std::int64_t* lda, double** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::complex<float>** a,
+                                      std::int64_t* lda, std::complex<float>** tau,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event geqrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::complex<double>** a,
+                                      std::int64_t* lda, std::complex<double>** tau,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, float* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, std::int64_t batch_size,
+                                      float* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, ipiv, stride_ipiv, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, double* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, std::int64_t batch_size,
+                                      double* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, ipiv, stride_ipiv, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::complex<float>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, std::int64_t batch_size,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, ipiv, stride_ipiv, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::complex<double>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, std::int64_t batch_size,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda,
+                                                        stride_a, ipiv, stride_ipiv, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, float** a,
+                                      std::int64_t* lda, std::int64_t** ipiv,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      float* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, double** a,
+                                      std::int64_t* lda, std::int64_t** ipiv,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      double* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::complex<float>** a,
+                                      std::int64_t* lda, std::int64_t** ipiv,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getrf_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::complex<double>** a,
+                                      std::int64_t* lda, std::int64_t** ipiv,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                      float* a, std::int64_t lda, std::int64_t stride_a,
+                                      std::int64_t* ipiv, std::int64_t stride_ipiv,
+                                      std::int64_t batch_size, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                        ipiv, stride_ipiv, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                      double* a, std::int64_t lda, std::int64_t stride_a,
+                                      std::int64_t* ipiv, std::int64_t stride_ipiv,
+                                      std::int64_t batch_size, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                        ipiv, stride_ipiv, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                      std::complex<float>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, std::int64_t batch_size,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                        ipiv, stride_ipiv, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                      std::complex<double>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, std::int64_t batch_size,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
+                                                        ipiv, stride_ipiv, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* n, float** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* n, double** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getri_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::transpose trans, std::int64_t n,
+                                      std::int64_t nrhs, float* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, float* b, std::int64_t ldb,
+                                      std::int64_t stride_b, std::int64_t batch_size,
+                                      float* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(
+        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::transpose trans, std::int64_t n,
+                                      std::int64_t nrhs, double* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t* ipiv,
+                                      std::int64_t stride_ipiv, double* b, std::int64_t ldb,
+                                      std::int64_t stride_b, std::int64_t batch_size,
+                                      double* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(
+        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(
+    backend_selector<backend::rocsolver> selector, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<float>* b, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size, std::complex<float>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(
+        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(
+    backend_selector<backend::rocsolver> selector, oneapi::math::transpose trans, std::int64_t n,
+    std::int64_t nrhs, std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
+    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<double>* b, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size, std::complex<double>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(
+        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
+                                      std::int64_t* nrhs, float** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, float** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      float* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a,
+                                                        lda, ipiv, b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
+                                      std::int64_t* nrhs, double** a, std::int64_t* lda,
+                                      std::int64_t** ipiv, double** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      double* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a,
+                                                        lda, ipiv, b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::transpose* trans, std::int64_t* n,
+                                      std::int64_t* nrhs, std::complex<float>** a,
+                                      std::int64_t* lda, std::int64_t** ipiv,
+                                      std::complex<float>** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a,
+                                                        lda, ipiv, b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event getrs_batch(
+    backend_selector<backend::rocsolver> selector, oneapi::math::transpose* trans, std::int64_t* n,
+    std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
+    std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
+    std::int64_t* group_sizes, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+    const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a,
+                                                        lda, ipiv, b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
+                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
+                                      std::int64_t batch_size, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
+                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
+                                      std::int64_t batch_size, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a,
+                                      std::int64_t* lda, float** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event orgqr_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a,
+                                      std::int64_t* lda, double** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::int64_t batch_size, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        stride_a, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::int64_t batch_size, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        stride_a, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n,
+                                      std::complex<float>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t batch_size,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        stride_a, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n,
+                                      std::complex<double>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::int64_t batch_size,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        stride_a, batch_size, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, float** a,
+                                      std::int64_t* lda, std::int64_t group_count,
+                                      std::int64_t* group_sizes, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, double** a,
+                                      std::int64_t* lda, std::int64_t group_count,
+                                      std::int64_t* group_sizes, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n,
+                                      std::complex<float>** a, std::int64_t* lda,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrf_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n,
+                                      std::complex<double>** a, std::int64_t* lda,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      float* a, std::int64_t lda, std::int64_t stride_a, float* b,
+                                      std::int64_t ldb, std::int64_t stride_b,
+                                      std::int64_t batch_size, float* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        stride_a, b, ldb, stride_b, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      double* a, std::int64_t lda, std::int64_t stride_a, double* b,
+                                      std::int64_t ldb, std::int64_t stride_b,
+                                      std::int64_t batch_size, double* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        stride_a, b, ldb, stride_b, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      std::complex<float>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::complex<float>* b,
+                                      std::int64_t ldb, std::int64_t stride_b,
+                                      std::int64_t batch_size, std::complex<float>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        stride_a, b, ldb, stride_b, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                      std::complex<double>* a, std::int64_t lda,
+                                      std::int64_t stride_a, std::complex<double>* b,
+                                      std::int64_t ldb, std::int64_t stride_b,
+                                      std::int64_t batch_size, std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        stride_a, b, ldb, stride_b, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      float* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      double** a, std::int64_t* lda, double** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      double* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      std::complex<float>** a, std::int64_t* lda,
+                                      std::complex<float>** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event potrs_batch(backend_selector<backend::rocsolver> selector,
+                                      oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+                                      std::complex<double>** a, std::int64_t* lda,
+                                      std::complex<double>** b, std::int64_t* ldb,
+                                      std::int64_t group_count, std::int64_t* group_sizes,
+                                      std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
+                                                        b, ldb, group_count, group_sizes,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, std::complex<float>* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::complex<float>* tau, std::int64_t stride_tau,
+                                      std::int64_t batch_size, std::complex<float>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                      std::int64_t n, std::int64_t k, std::complex<double>* a,
+                                      std::int64_t lda, std::int64_t stride_a,
+                                      std::complex<double>* tau, std::int64_t stride_tau,
+                                      std::int64_t batch_size, std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
+                                                        stride_a, tau, stride_tau, batch_size,
+                                                        scratchpad, scratchpad_size, dependencies);
+}
+static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                      std::complex<float>** a, std::int64_t* lda,
+                                      std::complex<float>** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+static inline sycl::event ungqr_batch(backend_selector<backend::rocsolver> selector,
+                                      std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                      std::complex<double>** a, std::int64_t* lda,
+                                      std::complex<double>** tau, std::int64_t group_count,
+                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                      std::int64_t scratchpad_size,
+                                      const std::vector<sycl::event>& dependencies = {}) {
+    return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
+                                                        group_count, group_sizes, scratchpad,
+                                                        scratchpad_size, dependencies);
+}
+
+// SCRATCHPAD APIs
+template <typename fp_type>
+std::int64_t gebrd_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::gebrd_scratchpad_size<fp_type>(selector.get_queue(), m,
+                                                                           n, lda);
+}
+template <typename fp_type>
+std::int64_t gerqf_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::gerqf_scratchpad_size<fp_type>(selector.get_queue(), m,
+                                                                           n, lda);
+}
+template <typename fp_type>
+std::int64_t geqrf_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::geqrf_scratchpad_size<fp_type>(selector.get_queue(), m,
+                                                                           n, lda);
+}
+template <typename fp_type>
+std::int64_t gesvd_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                                   std::int64_t m, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldu, std::int64_t ldvt) {
+    return oneapi::math::lapack::rocsolver::gesvd_scratchpad_size<fp_type>(
+        selector.get_queue(), jobu, jobvt, m, n, lda, ldu, ldvt);
+}
+template <typename fp_type>
+std::int64_t getrf_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                   std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::getrf_scratchpad_size<fp_type>(selector.get_queue(), m,
+                                                                           n, lda);
+}
+template <typename fp_type>
+std::int64_t getri_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t n,
+                                   std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::getri_scratchpad_size<fp_type>(selector.get_queue(), n,
+                                                                           lda);
+}
+template <typename fp_type>
+std::int64_t getrs_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+                                   std::int64_t lda, std::int64_t ldb) {
+    return oneapi::math::lapack::rocsolver::getrs_scratchpad_size<fp_type>(
+        selector.get_queue(), trans, n, nrhs, lda, ldb);
+}
+template <typename fp_type>
+std::int64_t heevd_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::heevd_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           jobz, uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t hegvd_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldb) {
+    return oneapi::math::lapack::rocsolver::hegvd_scratchpad_size<fp_type>(
+        selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
+}
+template <typename fp_type>
+std::int64_t hetrd_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::hetrd_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t hetrf_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::hetrf_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t orgbr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::orgbr_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           vect, m, n, k, lda);
+}
+template <typename fp_type>
+std::int64_t orgtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::orgtr_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t orgqr_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                   std::int64_t n, std::int64_t k, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::orgqr_scratchpad_size<fp_type>(selector.get_queue(), m,
+                                                                           n, k, lda);
+}
+template <typename fp_type>
+std::int64_t ormrq_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc) {
+    return oneapi::math::lapack::rocsolver::ormrq_scratchpad_size<fp_type>(
+        selector.get_queue(), side, trans, m, n, k, lda, ldc);
+}
+template <typename fp_type>
+std::int64_t ormqr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc) {
+    return oneapi::math::lapack::rocsolver::ormqr_scratchpad_size<fp_type>(
+        selector.get_queue(), side, trans, m, n, k, lda, ldc);
+}
+template <typename fp_type>
+std::int64_t ormtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldc) {
+    return oneapi::math::lapack::rocsolver::ormtr_scratchpad_size<fp_type>(
+        selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
+}
+template <typename fp_type>
+std::int64_t potrf_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::potrf_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t potrs_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                   std::int64_t lda, std::int64_t ldb) {
+    return oneapi::math::lapack::rocsolver::potrs_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, nrhs, lda, ldb);
+}
+template <typename fp_type>
+std::int64_t potri_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::potri_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t sytrf_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::sytrf_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t syevd_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                                   std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::syevd_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           jobz, uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t sygvd_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                   std::int64_t ldb) {
+    return oneapi::math::lapack::rocsolver::sygvd_scratchpad_size<fp_type>(
+        selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
+}
+template <typename fp_type>
+std::int64_t sytrd_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::sytrd_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t trtrs_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                   oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                                   std::int64_t lda, std::int64_t ldb) {
+    return oneapi::math::lapack::rocsolver::trtrs_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, trans, diag, n, nrhs, lda, ldb);
+}
+template <typename fp_type>
+std::int64_t ungbr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::generate vect, std::int64_t m, std::int64_t n,
+                                   std::int64_t k, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::ungbr_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           vect, m, n, k, lda);
+}
+template <typename fp_type>
+std::int64_t ungqr_scratchpad_size(backend_selector<backend::rocsolver> selector, std::int64_t m,
+                                   std::int64_t n, std::int64_t k, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::ungqr_scratchpad_size<fp_type>(selector.get_queue(), m,
+                                                                           n, k, lda);
+}
+template <typename fp_type>
+std::int64_t ungtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) {
+    return oneapi::math::lapack::rocsolver::ungtr_scratchpad_size<fp_type>(selector.get_queue(),
+                                                                           uplo, n, lda);
+}
+template <typename fp_type>
+std::int64_t unmrq_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc) {
+    return oneapi::math::lapack::rocsolver::unmrq_scratchpad_size<fp_type>(
+        selector.get_queue(), side, trans, m, n, k, lda, ldc);
+}
+template <typename fp_type>
+std::int64_t unmqr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::side side, oneapi::math::transpose trans,
+                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+                                   std::int64_t ldc) {
+    return oneapi::math::lapack::rocsolver::unmqr_scratchpad_size<fp_type>(
+        selector.get_queue(), side, trans, m, n, k, lda, ldc);
+}
+template <typename fp_type>
+std::int64_t unmtr_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                   oneapi::math::side side, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                   std::int64_t lda, std::int64_t ldc) {
+    return oneapi::math::lapack::rocsolver::unmtr_scratchpad_size<fp_type>(
+        selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
+}
+template <typename fp_type>
+std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t m, std::int64_t n, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t stride_ipiv,
+                                         std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda, stride_a, stride_ipiv, batch_size);
+}
+template <typename fp_type>
+std::int64_t getri_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t n, std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_ipiv, std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), n, lda, stride_a, stride_ipiv, batch_size);
+}
+template <typename fp_type>
+std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         oneapi::math::transpose trans, std::int64_t n,
+                                         std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_ipiv, std::int64_t ldb,
+                                         std::int64_t stride_b, std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b,
+        batch_size);
+}
+template <typename fp_type>
+std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t m, std::int64_t n, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t stride_tau,
+                                         std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda, stride_a, stride_tau, batch_size);
+}
+template <typename fp_type>
+std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda, stride_a, batch_size);
+}
+template <typename fp_type>
+std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                                         std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
+                                         std::int64_t stride_b, std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
+}
+template <typename fp_type>
+std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t m, std::int64_t n, std::int64_t k,
+                                         std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_tau, std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::orgqr_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
+}
+template <typename fp_type>
+std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t m, std::int64_t n, std::int64_t k,
+                                         std::int64_t lda, std::int64_t stride_a,
+                                         std::int64_t stride_tau, std::int64_t batch_size) {
+    return oneapi::math::lapack::rocsolver::ungqr_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
+}
+template <typename fp_type>
+std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda, group_count, group_sizes);
+}
+template <typename fp_type>
+std::int64_t getri_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), n, lda, group_count, group_sizes);
+}
+template <typename fp_type>
+std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         oneapi::math::transpose* trans, std::int64_t* n,
+                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+                                         std::int64_t group_count, std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes);
+}
+template <typename fp_type>
+std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
+                                         std::int64_t group_count, std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, lda, group_count, group_sizes);
+}
+template <typename fp_type>
+std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::orgqr_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, k, lda, group_count, group_sizes);
+}
+template <typename fp_type>
+std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, lda, group_count, group_sizes);
+}
+template <typename fp_type>
+std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         oneapi::math::uplo* uplo, std::int64_t* n,
+                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+                                         std::int64_t group_count, std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes);
+}
+template <typename fp_type>
+std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::rocsolver> selector,
+                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
+                                         std::int64_t* lda, std::int64_t group_count,
+                                         std::int64_t* group_sizes) {
+    return oneapi::math::lapack::rocsolver::ungqr_batch_scratchpad_size<fp_type>(
+        selector.get_queue(), m, n, k, lda, group_count, group_sizes);
+}
diff --git a/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hpp b/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hpp
similarity index 81%
rename from include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hpp
rename to include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hpp
index f7e83f9a9..905c46085 100644
--- a/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hpp
+++ b/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hpp
@@ -19,8 +19,8 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_LAPACK_ROCSOLVER_HPP_
-#define _ONEMKL_LAPACK_ROCSOLVER_HPP_
+#ifndef _ONEMATH_LAPACK_ROCSOLVER_HPP_
+#define _ONEMATH_LAPACK_ROCSOLVER_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -31,19 +31,19 @@
 #include <cstdint>
 #include <string>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/export.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/export.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
-#include "onemkl_lapack_rocsolver.hxx"
+#include "onemath_lapack_rocsolver.hxx"
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_LAPACK_ROCSOLVER_HPP_
+#endif //_ONEMATH_LAPACK_ROCSOLVER_HPP_
diff --git a/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hxx b/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hxx
new file mode 100644
index 000000000..f8ebfd700
--- /dev/null
+++ b/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hxx
@@ -0,0 +1,1856 @@
+/***************************************************************************
+*  Copyright (C) Codeplay Software Limited
+*  Copyright 2022 Intel Corporation
+*
+*  Licensed under the Apache License, Version 2.0 (the "License");
+*  you may not use this file except in compliance with the License.
+*  You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+*  For your convenience, a copy of the License has been included in this
+*  repository.
+*
+*  Unless required by applicable law or agreed to in writing, software
+*  distributed under the License is distributed on an "AS IS" BASIS,
+*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+*  See the License for the specific language governing permissions and
+*  limitations under the License.
+*
+**************************************************************************/
+
+// Buffer APIs
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tauq,
+                          sycl::buffer<std::complex<float>>& taup,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
+                          sycl::buffer<double>& e, sycl::buffer<double>& tauq,
+                          sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
+                          sycl::buffer<float>& e, sycl::buffer<float>& tauq,
+                          sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tauq,
+                          sycl::buffer<std::complex<double>>& taup,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
+                          std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
+                          std::int64_t ldb, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b,
+                          std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
+                          sycl::buffer<double>& vt, std::int64_t ldvt,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
+                          sycl::buffer<float>& vt, std::int64_t ldvt,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<float>& s,
+                          sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
+                          sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+                          std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<double>& s,
+                          sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
+                          sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<float>& d, sycl::buffer<float>& e,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<double>& d, sycl::buffer<double>& e,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                          std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+                          std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                          oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+                          std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
+                          sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
+                          sycl::buffer<double>& e, sycl::buffer<double>& tau,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
+                          sycl::buffer<float>& e, sycl::buffer<float>& tau,
+                          sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::int64_t>& ipiv,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                          sycl::buffer<double>& b, std::int64_t ldb,
+                          sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                          sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+                          std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                          std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+                          std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<float>>& tau,
+                          sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<float>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                          oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                          sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                          sycl::buffer<std::complex<double>>& tau,
+                          sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+                          sycl::buffer<std::complex<double>>& scratchpad,
+                          std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<float>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<double>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
+                                std::int64_t stride_ipiv, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<float>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<double>& tau, std::int64_t stride_tau,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                                std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
+                                std::int64_t batch_size, sycl::buffer<double>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
+                                std::int64_t lda, std::int64_t stride_a,
+                                sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
+                                std::int64_t stride_b, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<float>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+                                std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
+                                std::int64_t stride_tau, std::int64_t batch_size,
+                                sycl::buffer<std::complex<double>>& scratchpad,
+                                std::int64_t scratchpad_size);
+
+// USM APIs
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* d, float* e,
+                                 std::complex<float>* tauq, std::complex<float>* taup,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* d, double* e, double* tauq, double* taup,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* d, float* e, float* tauq, float* taup,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* d, double* e,
+                                 std::complex<double>* tauq, std::complex<double>* taup,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
+                                 std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                 std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
+                                 std::int64_t* ipiv, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
+                                 std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* b, std::int64_t ldb, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
+                                 float* b, std::int64_t ldb, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                 std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 double* a, std::int64_t lda, double* s, double* u,
+                                 std::int64_t ldu, double* vt, std::int64_t ldvt,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu,
+                                 float* vt, std::int64_t ldvt, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* s,
+                                 std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
+                                 std::int64_t ldvt, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                 oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* s,
+                                 std::complex<double>* u, std::int64_t ldu,
+                                 std::complex<double>* vt, std::int64_t ldvt,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, float* w, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, double* w, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                 float* w, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                 double* w, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, float* d, float* e,
+                                 std::complex<float>* tau, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, double* d, double* e,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
+                                 float* tau, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
+                                 double* tau, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 double* a, std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 float* a, std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* tau, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* tau, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
+                                 float* tau, float* c, std::int64_t ldc, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
+                                 double* tau, double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
+                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                 double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, double* a, std::int64_t lda, double* tau,
+                                 double* c, std::int64_t ldc, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
+                                 std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, float* a, std::int64_t lda, float* b,
+                                 std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, double* a, std::int64_t lda, double* b,
+                                 std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* w, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* w, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, double* a,
+                                 std::int64_t lda, double* b, std::int64_t ldb, double* w,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                 oneapi::math::uplo uplo, std::int64_t n, float* a,
+                                 std::int64_t lda, float* b, std::int64_t ldb, float* w,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, double* d, double* e, double* tau,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, float* d, float* e, float* tau,
+                                 float* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 double* a, std::int64_t lda, std::int64_t* ipiv,
+                                 double* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
+                                 double* b, std::int64_t ldb, double* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
+                                 float* b, std::int64_t ldb, float* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo,
+                                 oneapi::math::transpose trans, oneapi::math::diag diag,
+                                 std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
+                                 std::int64_t n, std::int64_t k, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                 std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                 std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
+                                 std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+                                 std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                 std::complex<double>* tau, std::complex<double>* c,
+                                 std::int64_t ldc, std::complex<double>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::complex<float>* a,
+                                 std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
+                                 std::int64_t ldc, std::complex<float>* scratchpad,
+                                 std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side,
+                                 oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                 std::int64_t m, std::int64_t n, std::complex<double>* a,
+                                 std::int64_t lda, std::complex<double>* tau,
+                                 std::complex<double>* c, std::int64_t ldc,
+                                 std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+                                 const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, float* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       double* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       float** a, std::int64_t* lda, float** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       double** a, std::int64_t* lda, double** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::complex<float>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::complex<double>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t* ipiv, std::int64_t stride_ipiv,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       float** a, std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       double** a, std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, float* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, float* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, double* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, double* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::complex<float>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans,
+                                       std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
+                                       std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
+                                       std::int64_t stride_ipiv, std::complex<double>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, float** a,
+                                       std::int64_t* lda, std::int64_t** ipiv, float** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, double** a,
+                                       std::int64_t* lda, std::int64_t** ipiv, double** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
+                                       std::int64_t* lda, std::int64_t** ipiv,
+                                       std::complex<float>** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans,
+                                       std::int64_t* n, std::int64_t* nrhs,
+                                       std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t** ipiv, std::complex<double>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, float* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, double* tau, std::int64_t stride_tau,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, float** a, std::int64_t* lda, float** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, double** a, std::int64_t* lda, double** tau,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       float* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, float* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       double* a, std::int64_t lda, std::int64_t stride_a,
+                                       std::int64_t batch_size, double* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, float** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, double** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, float* a, std::int64_t lda,
+                                       std::int64_t stride_a, float* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, double* a, std::int64_t lda,
+                                       std::int64_t stride_a, double* b, std::int64_t ldb,
+                                       std::int64_t stride_b, std::int64_t batch_size,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                                       std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* b,
+                                       std::int64_t ldb, std::int64_t stride_b,
+                                       std::int64_t batch_size, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, float** a,
+                                       std::int64_t* lda, float** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       float* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, double** a,
+                                       std::int64_t* lda, double** b, std::int64_t* ldb,
+                                       std::int64_t group_count, std::int64_t* group_sizes,
+                                       double* scratchpad, std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo,
+                                       std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
+                                       std::int64_t* lda, std::complex<float>** b,
+                                       std::int64_t* ldb, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event potrs_batch(
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    std::complex<double>** a, std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes, std::complex<double>* scratchpad,
+    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, std::complex<float>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<float>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
+                                       std::int64_t k, std::complex<double>* a, std::int64_t lda,
+                                       std::int64_t stride_a, std::complex<double>* tau,
+                                       std::int64_t stride_tau, std::int64_t batch_size,
+                                       std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, std::complex<float>** a, std::int64_t* lda,
+                                       std::complex<float>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<float>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+                                       std::int64_t* k, std::complex<double>** a, std::int64_t* lda,
+                                       std::complex<double>** tau, std::int64_t group_count,
+                                       std::int64_t* group_sizes, std::complex<double>* scratchpad,
+                                       std::int64_t scratchpad_size,
+                                       const std::vector<sycl::event>& dependencies = {});
+
+// SCRATCHPAD APIs
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                                  oneapi::math::jobsvd jobvt, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldu, std::int64_t ldvt);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans,
+                                                  std::int64_t n, std::int64_t nrhs,
+                                                  std::int64_t lda, std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                                  oneapi::math::uplo uplo, std::int64_t n,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
+                                                  oneapi::math::job jobz, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect,
+                                                  std::int64_t m, std::int64_t n, std::int64_t k,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::uplo uplo,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t nrhs,
+                                                  std::int64_t lda, std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz,
+                                                  oneapi::math::uplo uplo, std::int64_t n,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
+                                                  oneapi::math::job jobz, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  oneapi::math::transpose trans,
+                                                  oneapi::math::diag diag, std::int64_t n,
+                                                  std::int64_t nrhs, std::int64_t lda,
+                                                  std::int64_t ldb);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect,
+                                                  std::int64_t m, std::int64_t n, std::int64_t k,
+                                                  std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                  std::int64_t n, std::int64_t lda);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t k, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side,
+                                                  oneapi::math::uplo uplo,
+                                                  oneapi::math::transpose trans, std::int64_t m,
+                                                  std::int64_t n, std::int64_t lda,
+                                                  std::int64_t ldc);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t lda,
+                                                        std::int64_t stride_a,
+                                                        std::int64_t stride_ipiv,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t stride_ipiv,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size(
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
+    std::int64_t stride_b, std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t lda,
+                                                        std::int64_t stride_a,
+                                                        std::int64_t stride_tau,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                        std::int64_t n, std::int64_t lda,
+                                                        std::int64_t stride_a,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                        std::int64_t n, std::int64_t nrhs,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t ldb, std::int64_t stride_b,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t k,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t stride_tau,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
+                                                        std::int64_t n, std::int64_t k,
+                                                        std::int64_t lda, std::int64_t stride_a,
+                                                        std::int64_t stride_tau,
+                                                        std::int64_t batch_size);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* lda,
+                                                        std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size(
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* lda,
+                                                        std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* k,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue,
+                                                        oneapi::math::uplo* uplo, std::int64_t* n,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue,
+                                                        oneapi::math::uplo* uplo, std::int64_t* n,
+                                                        std::int64_t* nrhs, std::int64_t* lda,
+                                                        std::int64_t* ldb, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
+
+template <typename T>
+ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
+                                                        std::int64_t* n, std::int64_t* k,
+                                                        std::int64_t* lda, std::int64_t group_count,
+                                                        std::int64_t* group_sizes);
diff --git a/include/oneapi/mkl/lapack/exceptions.hpp b/include/oneapi/math/lapack/exceptions.hpp
similarity index 66%
rename from include/oneapi/mkl/lapack/exceptions.hpp
rename to include/oneapi/math/lapack/exceptions.hpp
index 59de3b4de..db9baf9d6 100644
--- a/include/oneapi/mkl/lapack/exceptions.hpp
+++ b/include/oneapi/math/lapack/exceptions.hpp
@@ -20,12 +20,12 @@
 #pragma once
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 
 class exception {
 public:
-    exception(oneapi::mkl::exception* _ex, std::int64_t info, std::int64_t detail = 0)
+    exception(oneapi::math::exception* _ex, std::int64_t info, std::int64_t detail = 0)
             : _info(info),
               _detail(detail),
               _ex(_ex) {}
@@ -42,27 +42,27 @@ class exception {
 private:
     std::int64_t _info;
     std::int64_t _detail;
-    mkl::exception* _ex;
+    math::exception* _ex;
 };
 
-class computation_error : public oneapi::mkl::computation_error,
-                          public oneapi::mkl::lapack::exception {
+class computation_error : public oneapi::math::computation_error,
+                          public oneapi::math::lapack::exception {
 public:
     computation_error(const std::string& function, const std::string& info, std::int64_t code)
-            : oneapi::mkl::computation_error("LAPACK", function, info),
-              oneapi::mkl::lapack::exception(this, code) {}
-    using oneapi::mkl::computation_error::what;
+            : oneapi::math::computation_error("LAPACK", function, info),
+              oneapi::math::lapack::exception(this, code) {}
+    using oneapi::math::computation_error::what;
 };
 
-class batch_error : public oneapi::mkl::batch_error, public oneapi::mkl::lapack::exception {
+class batch_error : public oneapi::math::batch_error, public oneapi::math::lapack::exception {
 public:
     batch_error(const std::string& function, const std::string& info, std::int64_t num_errors,
                 std::vector<std::int64_t> ids = {}, std::vector<std::exception_ptr> exceptions = {})
-            : oneapi::mkl::batch_error("LAPACK", function, info),
-              oneapi::mkl::lapack::exception(this, num_errors),
+            : oneapi::math::batch_error("LAPACK", function, info),
+              oneapi::math::lapack::exception(this, num_errors),
               _ids(ids),
               _exceptions(exceptions) {}
-    using oneapi::mkl::batch_error::what;
+    using oneapi::math::batch_error::what;
     const std::vector<std::int64_t>& ids() const {
         return _ids;
     }
@@ -75,16 +75,16 @@ class batch_error : public oneapi::mkl::batch_error, public oneapi::mkl::lapack:
     std::vector<std::exception_ptr> _exceptions;
 };
 
-class invalid_argument : public oneapi::mkl::invalid_argument,
-                         public oneapi::mkl::lapack::exception {
+class invalid_argument : public oneapi::math::invalid_argument,
+                         public oneapi::math::lapack::exception {
 public:
     invalid_argument(const std::string& function, const std::string& info,
                      std::int64_t arg_position = 0, std::int64_t detail = 0)
-            : oneapi::mkl::invalid_argument("LAPACK", function, info),
-              oneapi::mkl::lapack::exception(this, arg_position, detail) {}
-    using oneapi::mkl::invalid_argument::what;
+            : oneapi::math::invalid_argument("LAPACK", function, info),
+              oneapi::math::lapack::exception(this, arg_position, detail) {}
+    using oneapi::math::invalid_argument::what;
 };
 
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/include/oneapi/mkl/lapack/types.hpp b/include/oneapi/math/lapack/types.hpp
similarity index 98%
rename from include/oneapi/mkl/lapack/types.hpp
rename to include/oneapi/math/lapack/types.hpp
index 8dbe19e2e..e43721a50 100644
--- a/include/oneapi/mkl/lapack/types.hpp
+++ b/include/oneapi/math/lapack/types.hpp
@@ -29,7 +29,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace internal {
 
@@ -93,5 +93,5 @@ using is_complex_floating_point = typename enable_if<is_cfp<fp>::value>::type*;
 
 } // namespace internal
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/include/oneapi/math/rng.hpp b/include/oneapi/math/rng.hpp
new file mode 100644
index 000000000..71c9e57b9
--- /dev/null
+++ b/include/oneapi/math/rng.hpp
@@ -0,0 +1,41 @@
+/*******************************************************************************
+* Copyright 2020-2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#ifndef _ONEMATH_RNG_HPP_
+#define _ONEMATH_RNG_HPP_
+
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
+#include <CL/sycl.hpp>
+#endif
+#include <complex>
+#include <cstdint>
+
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
+
+#include "oneapi/math/rng/predicates.hpp"
+#include "oneapi/math/rng/detail/rng_loader.hpp"
+
+#include "oneapi/math/rng/functions.hpp"
+#include "oneapi/math/rng/distributions.hpp"
+#include "oneapi/math/rng/engines.hpp"
+
+#endif // _ONEMATH_RNG_HPP_
diff --git a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp b/include/oneapi/math/rng/detail/curand/onemath_rng_curand.hpp
similarity index 86%
rename from include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp
rename to include/oneapi/math/rng/detail/curand/onemath_rng_curand.hpp
index 062d21b61..7aa400bf8 100644
--- a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp
+++ b/include/oneapi/math/rng/detail/curand/onemath_rng_curand.hpp
@@ -56,8 +56,8 @@
  * so.
  ******************************************************************************/
 
-#ifndef _ONEMKL_RNG_CURAND_HPP_
-#define _ONEMKL_RNG_CURAND_HPP_
+#ifndef _ONEMATH_RNG_CURAND_HPP_
+#define _ONEMATH_RNG_CURAND_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -66,29 +66,29 @@
 #endif
 #include <cstdint>
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace curand {
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
-                                                                          std::uint64_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                                            std::uint64_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                                     std::uint32_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                                       std::uint32_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(
     sycl::queue queue, std::initializer_list<std::uint32_t> seed);
 
 } // namespace curand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_CURAND_HPP_
+#endif //_ONEMATH_RNG_CURAND_HPP_
diff --git a/include/oneapi/mkl/rng/detail/engine_impl.hpp b/include/oneapi/math/rng/detail/engine_impl.hpp
similarity index 96%
rename from include/oneapi/mkl/rng/detail/engine_impl.hpp
rename to include/oneapi/math/rng/detail/engine_impl.hpp
index e76181e4c..2d3c2b562 100644
--- a/include/oneapi/mkl/rng/detail/engine_impl.hpp
+++ b/include/oneapi/math/rng/detail/engine_impl.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_ENGINE_IMPL_HPP_
-#define _ONEMKL_RNG_ENGINE_IMPL_HPP_
+#ifndef _ONEMATH_RNG_ENGINE_IMPL_HPP_
+#define _ONEMATH_RNG_ENGINE_IMPL_HPP_
 
 #include <cstdint>
 #if __has_include(<sycl/sycl.hpp>)
@@ -27,14 +27,14 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
 
-#include "oneapi/mkl/rng/distributions.hpp"
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/rng/distributions.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace detail {
 
@@ -191,7 +191,7 @@ class engine_impl {
 
 } // namespace detail
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_ENGINE_IMPL_HPP_
+#endif //_ONEMATH_RNG_ENGINE_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp b/include/oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp
similarity index 67%
rename from include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp
rename to include/oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp
index e13b70148..9848a5cee 100644
--- a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp
+++ b/include/oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_MKLCPU_HPP_
-#define _ONEMKL_RNG_MKLCPU_HPP_
+#ifndef _ONEMATH_RNG_MKLCPU_HPP_
+#define _ONEMATH_RNG_MKLCPU_HPP_
 
 #include <cstdint>
 #if __has_include(<sycl/sycl.hpp>)
@@ -27,29 +27,29 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace mklcpu {
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
-                                                                          std::uint64_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                                            std::uint64_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                                     std::uint32_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                                       std::uint32_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(
     sycl::queue queue, std::initializer_list<std::uint32_t> seed);
 
 } // namespace mklcpu
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_MKLCPU_HPP_
+#endif //_ONEMATH_RNG_MKLCPU_HPP_
diff --git a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp b/include/oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp
similarity index 67%
rename from include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp
rename to include/oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp
index 4dd55f19b..f36fe3997 100644
--- a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp
+++ b/include/oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_MKLGPU_HPP_
-#define _ONEMKL_RNG_MKLGPU_HPP_
+#ifndef _ONEMATH_RNG_MKLGPU_HPP_
+#define _ONEMATH_RNG_MKLGPU_HPP_
 
 #include <cstdint>
 #if __has_include(<sycl/sycl.hpp>)
@@ -27,29 +27,29 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace mklgpu {
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
-                                                                          std::uint64_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                                            std::uint64_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                                     std::uint32_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                                       std::uint32_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(
     sycl::queue queue, std::initializer_list<std::uint32_t> seed);
 
 } // namespace mklgpu
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_MKLGPU_HPP_
+#endif //_ONEMATH_RNG_MKLGPU_HPP_
diff --git a/include/oneapi/mkl/rng/detail/rng_loader.hpp b/include/oneapi/math/rng/detail/rng_loader.hpp
similarity index 51%
rename from include/oneapi/mkl/rng/detail/rng_loader.hpp
rename to include/oneapi/math/rng/detail/rng_loader.hpp
index dc85df5d6..1855d641e 100644
--- a/include/oneapi/mkl/rng/detail/rng_loader.hpp
+++ b/include/oneapi/math/rng/detail/rng_loader.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_LOADER_HPP_
-#define _ONEMKL_RNG_LOADER_HPP_
+#ifndef _ONEMATH_RNG_LOADER_HPP_
+#define _ONEMATH_RNG_LOADER_HPP_
 
 #include <cstdint>
 #if __has_include(<sycl/sycl.hpp>)
@@ -27,31 +27,31 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
 
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace detail {
 
-ONEMKL_EXPORT engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue,
-                                                std::uint64_t seed);
+ONEMATH_EXPORT engine_impl* create_philox4x32x10(oneapi::math::device libkey, sycl::queue queue,
+                                                 std::uint64_t seed);
 
-ONEMKL_EXPORT engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue,
-                                                std::initializer_list<std::uint64_t> seed);
+ONEMATH_EXPORT engine_impl* create_philox4x32x10(oneapi::math::device libkey, sycl::queue queue,
+                                                 std::initializer_list<std::uint64_t> seed);
 
-ONEMKL_EXPORT engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue,
-                                           std::uint32_t seed);
+ONEMATH_EXPORT engine_impl* create_mrg32k3a(oneapi::math::device libkey, sycl::queue queue,
+                                            std::uint32_t seed);
 
-ONEMKL_EXPORT engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue,
-                                           std::initializer_list<std::uint32_t> seed);
+ONEMATH_EXPORT engine_impl* create_mrg32k3a(oneapi::math::device libkey, sycl::queue queue,
+                                            std::initializer_list<std::uint32_t> seed);
 
 } // namespace detail
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_LOADER_HPP_
+#endif //_ONEMATH_RNG_LOADER_HPP_
diff --git a/include/oneapi/mkl/rng/detail/rocrand/onemkl_rng_rocrand.hpp b/include/oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp
similarity index 86%
rename from include/oneapi/mkl/rng/detail/rocrand/onemkl_rng_rocrand.hpp
rename to include/oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp
index 791bcc13b..901f618f0 100644
--- a/include/oneapi/mkl/rng/detail/rocrand/onemkl_rng_rocrand.hpp
+++ b/include/oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp
@@ -58,8 +58,8 @@
  * so.
  ******************************************************************************/
 
-#ifndef _ONEMKL_RNG_ROCRAND_HPP_
-#define _ONEMKL_RNG_ROCRAND_HPP_
+#ifndef _ONEMATH_RNG_ROCRAND_HPP_
+#define _ONEMATH_RNG_ROCRAND_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -68,29 +68,29 @@
 #endif
 #include <cstdint>
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace rocrand {
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
-                                                                          std::uint64_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                                            std::uint64_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                                     std::uint32_t seed);
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                                       std::uint32_t seed);
 
-ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(
+ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(
     sycl::queue queue, std::initializer_list<std::uint32_t> seed);
 
 } // namespace rocrand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_ROCRAND_HPP_
+#endif //_ONEMATH_RNG_ROCRAND_HPP_
diff --git a/include/oneapi/math/rng/device.hpp b/include/oneapi/math/rng/device.hpp
new file mode 100644
index 000000000..fb0734175
--- /dev/null
+++ b/include/oneapi/math/rng/device.hpp
@@ -0,0 +1,28 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#ifndef ONEMATH_RNG_SYCL_DEVICE_HPP__
+#define ONEMATH_RNG_SYCL_DEVICE_HPP__
+
+#include "oneapi/math/rng/device/types.hpp"
+#include "oneapi/math/rng/device/functions.hpp"
+#include "oneapi/math/rng/device/distributions.hpp"
+#include "oneapi/math/rng/device/engines.hpp"
+
+#endif // ONEMATH_RNG_SYCL_DEVICE_HPP__
diff --git a/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp b/include/oneapi/math/rng/device/detail/bernoulli_impl.hpp
similarity index 82%
rename from include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp
rename to include/oneapi/math/rng/device/detail/bernoulli_impl.hpp
index 83bb92f2d..0521d7f75 100644
--- a/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/bernoulli_impl.hpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_BERNOULLI_IMPL_HPP_
-#define _MKL_RNG_DEVICE_BERNOULLI_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_BERNOULLI_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_BERNOULLI_IMPL_HPP_
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 template <typename IntType, typename Method>
-class distribution_base<oneapi::mkl::rng::device::bernoulli<IntType, Method>> {
+class distribution_base<oneapi::math::rng::device::bernoulli<IntType, Method>> {
 public:
     struct param_type {
         param_type(float p) : p_(p) {}
@@ -33,7 +33,7 @@ class distribution_base<oneapi::mkl::rng::device::bernoulli<IntType, Method>> {
     distribution_base(float p) : p_(p) {
 #ifndef __SYCL_DEVICE_ONLY__
         if ((p > 1.0f) || (p < 0.0f)) {
-            throw oneapi::mkl::invalid_argument("rng", "bernoulli", "p < 0 || p > 1");
+            throw oneapi::math::invalid_argument("rng", "bernoulli", "p < 0 || p > 1");
         }
 #endif
     }
@@ -49,7 +49,7 @@ class distribution_base<oneapi::mkl::rng::device::bernoulli<IntType, Method>> {
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if ((pt.p_ > 1.0f) || (pt.p_ < 0.0f)) {
-            throw oneapi::mkl::invalid_argument("rng", "bernoulli", "p < 0 || p > 1");
+            throw oneapi::math::invalid_argument("rng", "bernoulli", "p < 0 || p > 1");
         }
 #endif
         p_ = pt.p_;
@@ -84,6 +84,6 @@ class distribution_base<oneapi::mkl::rng::device::bernoulli<IntType, Method>> {
     float p_;
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_BERNOULLI_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_BERNOULLI_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/beta_impl.hpp b/include/oneapi/math/rng/device/detail/beta_impl.hpp
similarity index 95%
rename from include/oneapi/mkl/rng/device/detail/beta_impl.hpp
rename to include/oneapi/math/rng/device/detail/beta_impl.hpp
index e412ee157..405b1c4d6 100644
--- a/include/oneapi/mkl/rng/device/detail/beta_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/beta_impl.hpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_BETA_IMPL_HPP_
-#define _MKL_RNG_DEVICE_BETA_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_BETA_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_BETA_IMPL_HPP_
 
 #include "vm_wrappers.hpp"
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 enum class beta_algorithm { Johnk = 0, Atkinson1, Atkinson2, Atkinson3, Cheng, p1, q1, p1q1 };
 
@@ -54,7 +54,7 @@ inline DataType beta_c() {
 }
 
 template <typename RealType, typename Method>
-class distribution_base<oneapi::mkl::rng::device::beta<RealType, Method>> {
+class distribution_base<oneapi::math::rng::device::beta<RealType, Method>> {
 public:
     struct param_type {
         param_type(RealType p, RealType q, RealType a, RealType b) : p_(p), q_(q), a_(a), b_(b) {}
@@ -73,13 +73,13 @@ class distribution_base<oneapi::mkl::rng::device::beta<RealType, Method>> {
         set_algorithm();
 #ifndef __SYCL_DEVICE_ONLY__
         if (p <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "beta", "p <= 0");
+            throw oneapi::math::invalid_argument("rng", "beta", "p <= 0");
         }
         else if (q <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "beta", "q <= 0");
+            throw oneapi::math::invalid_argument("rng", "beta", "q <= 0");
         }
         else if (b <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "beta", "b <= 0");
+            throw oneapi::math::invalid_argument("rng", "beta", "b <= 0");
         }
 #endif
     }
@@ -111,13 +111,13 @@ class distribution_base<oneapi::mkl::rng::device::beta<RealType, Method>> {
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.p_ <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "beta", "p <= 0");
+            throw oneapi::math::invalid_argument("rng", "beta", "p <= 0");
         }
         else if (pt.q_ <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "beta", "q <= 0");
+            throw oneapi::math::invalid_argument("rng", "beta", "q <= 0");
         }
         else if (pt.b_ <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "beta", "b <= 0");
+            throw oneapi::math::invalid_argument("rng", "beta", "b <= 0");
         }
 #endif
         p_ = pt.p_;
@@ -463,6 +463,6 @@ class distribution_base<oneapi::mkl::rng::device::beta<RealType, Method>> {
     beta_algorithm algorithm_;
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_BETA_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_BETA_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/bits_impl.hpp b/include/oneapi/math/rng/device/detail/bits_impl.hpp
similarity index 80%
rename from include/oneapi/mkl/rng/device/detail/bits_impl.hpp
rename to include/oneapi/math/rng/device/detail/bits_impl.hpp
index aa68956d6..398c25388 100644
--- a/include/oneapi/mkl/rng/device/detail/bits_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/bits_impl.hpp
@@ -17,15 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_BITS_IMPL_HPP_
-#define _MKL_RNG_DEVICE_BITS_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_BITS_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_BITS_IMPL_HPP_
 
 #include "engine_base.hpp"
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 template <typename UIntType>
-class distribution_base<oneapi::mkl::rng::device::bits<UIntType>> {
+class distribution_base<oneapi::math::rng::device::bits<UIntType>> {
 protected:
     template <typename EngineType>
     auto generate(EngineType& engine) -> typename std::enable_if<
@@ -33,7 +33,7 @@ class distribution_base<oneapi::mkl::rng::device::bits<UIntType>> {
         typename std::conditional<EngineType::vec_size == 1, UIntType,
                                   sycl::vec<UIntType, EngineType::vec_size>>::type>::type {
         static_assert(std::is_same<UIntType, uint32_t>::value,
-                      "oneMKL: bits works only with std::uint32_t");
+                      "oneMath: bits works only with std::uint32_t");
         return engine.generate();
     }
 
@@ -43,7 +43,7 @@ class distribution_base<oneapi::mkl::rng::device::bits<UIntType>> {
         typename std::conditional<EngineType::vec_size == 1, UIntType,
                                   sycl::vec<UIntType, EngineType::vec_size>>::type>::type {
         static_assert(std::is_same<UIntType, uint64_t>::value,
-                      "oneMKL: bits for mcg59 works only with std::uint64_t");
+                      "oneMath: bits for mcg59 works only with std::uint64_t");
         return engine.generate_bits();
     }
 
@@ -52,7 +52,7 @@ class distribution_base<oneapi::mkl::rng::device::bits<UIntType>> {
                             UIntType>::type
     generate_single(EngineType& engine) {
         static_assert(std::is_same<UIntType, uint32_t>::value,
-                      "oneMKL: bits works only with std::uint32_t");
+                      "oneMath: bits works only with std::uint32_t");
         return engine.generate_single();
     }
 
@@ -61,11 +61,11 @@ class distribution_base<oneapi::mkl::rng::device::bits<UIntType>> {
                             UIntType>::type
     generate_single(EngineType& engine) {
         static_assert(std::is_same<UIntType, uint64_t>::value,
-                      "oneMKL: bits for mcg59 works only with std::uint64_t");
+                      "oneMath: bits for mcg59 works only with std::uint64_t");
         return engine.generate_single();
     }
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_BITS_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_BITS_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/distribution_base.hpp b/include/oneapi/math/rng/device/detail/distribution_base.hpp
similarity index 68%
rename from include/oneapi/mkl/rng/device/detail/distribution_base.hpp
rename to include/oneapi/math/rng/device/detail/distribution_base.hpp
index 575ea27f7..4faf6cd49 100644
--- a/include/oneapi/mkl/rng/device/detail/distribution_base.hpp
+++ b/include/oneapi/math/rng/device/detail/distribution_base.hpp
@@ -17,15 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DISTRIBUTION_BASE_HPP_
-#define _MKL_RNG_DISTRIBUTION_BASE_HPP_
+#ifndef ONEMATH_RNG_DISTRIBUTION_BASE_HPP_
+#define ONEMATH_RNG_DISTRIBUTION_BASE_HPP_
 
 #include <sycl/sycl.hpp>
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/device/types.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/device/types.hpp"
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 namespace detail {
 
@@ -65,17 +65,17 @@ class poisson;
 template <typename IntType = std::uint32_t, typename Method = bernoulli_method::by_default>
 class bernoulli;
 
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#include "oneapi/mkl/rng/device/detail/uniform_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/gaussian_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/lognormal_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/bits_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/exponential_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/poisson_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/bernoulli_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/beta_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/gamma_impl.hpp"
+#include "oneapi/math/rng/device/detail/uniform_impl.hpp"
+#include "oneapi/math/rng/device/detail/gaussian_impl.hpp"
+#include "oneapi/math/rng/device/detail/lognormal_impl.hpp"
+#include "oneapi/math/rng/device/detail/bits_impl.hpp"
+#include "oneapi/math/rng/device/detail/uniform_bits_impl.hpp"
+#include "oneapi/math/rng/device/detail/exponential_impl.hpp"
+#include "oneapi/math/rng/device/detail/poisson_impl.hpp"
+#include "oneapi/math/rng/device/detail/bernoulli_impl.hpp"
+#include "oneapi/math/rng/device/detail/beta_impl.hpp"
+#include "oneapi/math/rng/device/detail/gamma_impl.hpp"
 
-#endif // _MKL_RNG_DISTRIBUTION_BASE_HPP_
+#endif // ONEMATH_RNG_DISTRIBUTION_BASE_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/engine_base.hpp b/include/oneapi/math/rng/device/detail/engine_base.hpp
similarity index 68%
rename from include/oneapi/mkl/rng/device/detail/engine_base.hpp
rename to include/oneapi/math/rng/device/detail/engine_base.hpp
index fc1aee16a..07efadb41 100644
--- a/include/oneapi/mkl/rng/device/detail/engine_base.hpp
+++ b/include/oneapi/math/rng/device/detail/engine_base.hpp
@@ -17,14 +17,14 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_ENGINE_BASE_HPP_
-#define _MKL_RNG_DEVICE_ENGINE_BASE_HPP_
+#ifndef ONEMATH_RNG_DEVICE_ENGINE_BASE_HPP_
+#define ONEMATH_RNG_DEVICE_ENGINE_BASE_HPP_
 
 #include <cstdint>
 
 #include <sycl/sycl.hpp>
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 // internal structure to specify state of engine
 template <typename EngineType>
@@ -33,11 +33,11 @@ struct engine_state {};
 template <typename EngineType>
 class engine_base {};
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#include "oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp"
-#include "oneapi/mkl/rng/device/detail/mcg59_impl.hpp"
+#include "oneapi/math/rng/device/detail/philox4x32x10_impl.hpp"
+#include "oneapi/math/rng/device/detail/mrg32k3a_impl.hpp"
+#include "oneapi/math/rng/device/detail/mcg31m1_impl.hpp"
+#include "oneapi/math/rng/device/detail/mcg59_impl.hpp"
 
-#endif // _MKL_RNG_DEVICE_ENGINE_BASE_HPP_
+#endif // ONEMATH_RNG_DEVICE_ENGINE_BASE_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp b/include/oneapi/math/rng/device/detail/exponential_impl.hpp
similarity index 79%
rename from include/oneapi/mkl/rng/device/detail/exponential_impl.hpp
rename to include/oneapi/math/rng/device/detail/exponential_impl.hpp
index 9419fc154..5c89bf824 100644
--- a/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/exponential_impl.hpp
@@ -17,15 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_EXPONENTIAL_IMPL_HPP_
-#define _MKL_RNG_DEVICE_EXPONENTIAL_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_EXPONENTIAL_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_EXPONENTIAL_IMPL_HPP_
 
 #include "vm_wrappers.hpp"
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 template <typename RealType, typename Method>
-class distribution_base<oneapi::mkl::rng::device::exponential<RealType, Method>> {
+class distribution_base<oneapi::math::rng::device::exponential<RealType, Method>> {
 public:
     struct param_type {
         param_type(RealType a, RealType beta) : a_(a), beta_(beta) {}
@@ -36,7 +36,7 @@ class distribution_base<oneapi::mkl::rng::device::exponential<RealType, Method>>
     distribution_base(RealType a, RealType beta) : a_(a), beta_(beta) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (beta <= static_cast<RealType>(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "exponential", "beta <= 0");
+            throw oneapi::math::invalid_argument("rng", "exponential", "beta <= 0");
         }
 #endif
     }
@@ -56,7 +56,7 @@ class distribution_base<oneapi::mkl::rng::device::exponential<RealType, Method>>
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.beta_ <= static_cast<RealType>(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "exponential", "beta <= 0");
+            throw oneapi::math::invalid_argument("rng", "exponential", "beta <= 0");
         }
 #endif
         a_ = pt.a_;
@@ -123,18 +123,19 @@ class distribution_base<oneapi::mkl::rng::device::exponential<RealType, Method>>
     RealType beta_;
 
     friend class distribution_base<
-        oneapi::mkl::rng::device::poisson<std::int32_t, poisson_method::devroye>>;
+        oneapi::math::rng::device::poisson<std::int32_t, poisson_method::devroye>>;
     friend class distribution_base<
-        oneapi::mkl::rng::device::poisson<std::uint32_t, poisson_method::devroye>>;
-    friend class distribution_base<oneapi::mkl::rng::device::gamma<float, gamma_method::marsaglia>>;
+        oneapi::math::rng::device::poisson<std::uint32_t, poisson_method::devroye>>;
     friend class distribution_base<
-        oneapi::mkl::rng::device::gamma<double, gamma_method::marsaglia>>;
+        oneapi::math::rng::device::gamma<float, gamma_method::marsaglia>>;
     friend class distribution_base<
-        oneapi::mkl::rng::device::gamma<float, gamma_method::marsaglia_accurate>>;
+        oneapi::math::rng::device::gamma<double, gamma_method::marsaglia>>;
     friend class distribution_base<
-        oneapi::mkl::rng::device::gamma<double, gamma_method::marsaglia_accurate>>;
+        oneapi::math::rng::device::gamma<float, gamma_method::marsaglia_accurate>>;
+    friend class distribution_base<
+        oneapi::math::rng::device::gamma<double, gamma_method::marsaglia_accurate>>;
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_EXPONENTIAL_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_EXPONENTIAL_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp b/include/oneapi/math/rng/device/detail/gamma_impl.hpp
similarity index 89%
rename from include/oneapi/mkl/rng/device/detail/gamma_impl.hpp
rename to include/oneapi/math/rng/device/detail/gamma_impl.hpp
index 11397a69d..816ce0ecb 100644
--- a/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/gamma_impl.hpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_GAMMA_IMPL_HPP_
-#define _MKL_RNG_DEVICE_GAMMA_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_GAMMA_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_GAMMA_IMPL_HPP_
 
 #include "vm_wrappers.hpp"
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 enum class gamma_algorithm { Exponential = 0, Vaduva, EPD_Transform, Marsaglia };
 
@@ -54,7 +54,7 @@ inline DataType gamma_c06() {
 }
 
 template <typename RealType, typename Method>
-class distribution_base<oneapi::mkl::rng::device::gamma<RealType, Method>> {
+class distribution_base<oneapi::math::rng::device::gamma<RealType, Method>> {
 public:
     struct param_type {
         param_type(RealType alpha, RealType a, RealType beta) : alpha_(alpha), a_(a), beta_(beta) {}
@@ -71,10 +71,10 @@ class distribution_base<oneapi::mkl::rng::device::gamma<RealType, Method>> {
         set_algorithm();
 #ifndef __SYCL_DEVICE_ONLY__
         if (alpha <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gamma", "alpha <= 0");
+            throw oneapi::math::invalid_argument("rng", "gamma", "alpha <= 0");
         }
         else if (beta <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gamma", "beta <= 0");
+            throw oneapi::math::invalid_argument("rng", "gamma", "beta <= 0");
         }
 #endif
     }
@@ -102,10 +102,10 @@ class distribution_base<oneapi::mkl::rng::device::gamma<RealType, Method>> {
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.alpha_ <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gamma", "alpha <= 0");
+            throw oneapi::math::invalid_argument("rng", "gamma", "alpha <= 0");
         }
         else if (pt.beta_ <= RealType(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gamma", "beta <= 0");
+            throw oneapi::math::invalid_argument("rng", "gamma", "beta <= 0");
         }
 #endif
         alpha_ = pt.alpha_;
@@ -250,7 +250,8 @@ class distribution_base<oneapi::mkl::rng::device::gamma<RealType, Method>> {
         typename std::conditional<EngineType::vec_size == 1, RealType,
                                   sycl::vec<RealType, EngineType::vec_size>>::type {
         if (algorithm_ == gamma_algorithm::Exponential) {
-            distribution_base<oneapi::mkl::rng::device::exponential<RealType>> distr_exp(a_, beta_);
+            distribution_base<oneapi::math::rng::device::exponential<RealType>> distr_exp(a_,
+                                                                                          beta_);
             return distr_exp.generate(engine);
         }
         sycl::vec<RealType, EngineType::vec_size> res{};
@@ -262,7 +263,8 @@ class distribution_base<oneapi::mkl::rng::device::gamma<RealType, Method>> {
     template <typename EngineType>
     RealType generate_single(EngineType& engine) {
         if (algorithm_ == gamma_algorithm::Exponential) {
-            distribution_base<oneapi::mkl::rng::device::exponential<RealType>> distr_exp(a_, beta_);
+            distribution_base<oneapi::math::rng::device::exponential<RealType>> distr_exp(a_,
+                                                                                          beta_);
             RealType z = distr_exp.generate_single(engine);
             return z;
         }
@@ -282,6 +284,6 @@ class distribution_base<oneapi::mkl::rng::device::gamma<RealType, Method>> {
     gamma_algorithm algorithm_;
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_GAMMA_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_GAMMA_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp b/include/oneapi/math/rng/device/detail/gaussian_impl.hpp
similarity index 90%
rename from include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp
rename to include/oneapi/math/rng/device/detail/gaussian_impl.hpp
index 4588aea97..a0de61e60 100644
--- a/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/gaussian_impl.hpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_GAUSSIAN_IMPL_HPP_
-#define _MKL_RNG_DEVICE_GAUSSIAN_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_GAUSSIAN_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_GAUSSIAN_IMPL_HPP_
 
 #include "vm_wrappers.hpp"
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 // sqrt(2)
 template <typename RealType = float>
@@ -37,7 +37,7 @@ constexpr inline double sqrt2<double>() {
 
 template <typename RealType>
 class distribution_base<
-    oneapi::mkl::rng::device::gaussian<RealType, gaussian_method::box_muller2>> {
+    oneapi::math::rng::device::gaussian<RealType, gaussian_method::box_muller2>> {
 public:
     struct param_type {
         param_type(RealType mean, RealType stddev) : mean_(mean), stddev_(stddev) {}
@@ -49,7 +49,7 @@ class distribution_base<
         flag_ = false;
 #ifndef __SYCL_DEVICE_ONLY__
         if (stddev <= RealType(0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gaussian", "stddev <= 0");
+            throw oneapi::math::invalid_argument("rng", "gaussian", "stddev <= 0");
         }
 #endif
     }
@@ -69,7 +69,7 @@ class distribution_base<
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.stddev_ <= RealType(0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gaussian", "stddev <= 0");
+            throw oneapi::math::invalid_argument("rng", "gaussian", "stddev <= 0");
         }
 #endif
         mean_ = pt.mean_;
@@ -185,17 +185,17 @@ class distribution_base<
     RealType u2_;
 
     friend class distribution_base<
-        oneapi::mkl::rng::device::lognormal<RealType, lognormal_method::box_muller2>>;
+        oneapi::math::rng::device::lognormal<RealType, lognormal_method::box_muller2>>;
     friend class distribution_base<
-        oneapi::mkl::rng::device::poisson<std::int32_t, poisson_method::devroye>>;
+        oneapi::math::rng::device::poisson<std::int32_t, poisson_method::devroye>>;
     friend class distribution_base<
-        oneapi::mkl::rng::device::poisson<std::uint32_t, poisson_method::devroye>>;
+        oneapi::math::rng::device::poisson<std::uint32_t, poisson_method::devroye>>;
 };
 
 #if MKL_RNG_USE_BINARY_CODE
 
 template <typename RealType>
-class distribution_base<oneapi::mkl::rng::device::gaussian<RealType, gaussian_method::icdf>> {
+class distribution_base<oneapi::math::rng::device::gaussian<RealType, gaussian_method::icdf>> {
 public:
     struct param_type {
         param_type(RealType mean, RealType stddev) : mean_(mean), stddev_(stddev) {}
@@ -206,7 +206,7 @@ class distribution_base<oneapi::mkl::rng::device::gaussian<RealType, gaussian_me
     distribution_base(RealType mean, RealType stddev) : mean_(mean), stddev_(stddev) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (stddev <= RealType(0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gaussian", "stddev <= 0");
+            throw oneapi::math::invalid_argument("rng", "gaussian", "stddev <= 0");
         }
 #endif
     }
@@ -226,7 +226,7 @@ class distribution_base<oneapi::mkl::rng::device::gaussian<RealType, gaussian_me
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.stddev_ <= RealType(0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gaussian", "stddev <= 0");
+            throw oneapi::math::invalid_argument("rng", "gaussian", "stddev <= 0");
         }
 #endif
         mean_ = pt.mean_;
@@ -265,6 +265,6 @@ class distribution_base<oneapi::mkl::rng::device::gaussian<RealType, gaussian_me
 };
 #endif
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_GAUSSIAN_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_GAUSSIAN_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp b/include/oneapi/math/rng/device/detail/lognormal_impl.hpp
similarity index 81%
rename from include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp
rename to include/oneapi/math/rng/device/detail/lognormal_impl.hpp
index 85e8b6d57..d52a91de2 100644
--- a/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/lognormal_impl.hpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_LOGNORMAL_IMPL_HPP_
-#define _MKL_RNG_DEVICE_LOGNORMAL_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_LOGNORMAL_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_LOGNORMAL_IMPL_HPP_
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 template <typename RealType, typename Method>
-class distribution_base<oneapi::mkl::rng::device::lognormal<RealType, Method>> {
+class distribution_base<oneapi::math::rng::device::lognormal<RealType, Method>> {
 public:
     struct param_type {
         param_type(RealType m, RealType s, RealType displ, RealType scale)
@@ -43,7 +43,7 @@ class distribution_base<oneapi::mkl::rng::device::lognormal<RealType, Method>> {
               scale_(scale) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (scale <= static_cast<RealType>(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "lognormal", "scale <= 0");
+            throw oneapi::math::invalid_argument("rng", "lognormal", "scale <= 0");
         }
 #endif
     }
@@ -71,7 +71,7 @@ class distribution_base<oneapi::mkl::rng::device::lognormal<RealType, Method>> {
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.scale_ <= static_cast<RealType>(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "lognormal", "scale <= 0");
+            throw oneapi::math::invalid_argument("rng", "lognormal", "scale <= 0");
         }
 #endif
         gaussian_.param({ pt.m_, pt.s_ });
@@ -94,12 +94,12 @@ class distribution_base<oneapi::mkl::rng::device::lognormal<RealType, Method>> {
         return sycl::exp(res) * scale_ + displ_;
     }
 
-    distribution_base<oneapi::mkl::rng::device::gaussian<RealType, gaussian_method::box_muller2>>
+    distribution_base<oneapi::math::rng::device::gaussian<RealType, gaussian_method::box_muller2>>
         gaussian_;
     RealType displ_;
     RealType scale_;
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_LOGNORMAL_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_LOGNORMAL_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp b/include/oneapi/math/rng/device/detail/mcg31m1_impl.hpp
similarity index 91%
rename from include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp
rename to include/oneapi/math/rng/device/detail/mcg31m1_impl.hpp
index 72447bc5d..8542fec7c 100644
--- a/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/mcg31m1_impl.hpp
@@ -17,10 +17,10 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_MCG31M1_IMPL_HPP_
-#define _MKL_RNG_DEVICE_MCG31M1_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_MCG31M1_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_MCG31M1_IMPL_HPP_
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 template <std::int32_t VecSize = 1>
 class mcg31m1;
@@ -71,7 +71,7 @@ struct mcg31m1_param {
 };
 
 template <std::int32_t VecSize>
-struct engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>> {
+struct engine_state<oneapi::math::rng::device::mcg31m1<VecSize>> {
     std::uint32_t s;
 };
 
@@ -134,14 +134,14 @@ static inline std::uint64_t power(std::uint64_t a, std::uint64_t n) {
 }
 
 template <std::int32_t VecSize>
-static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>>& state,
+static inline void skip_ahead(engine_state<oneapi::math::rng::device::mcg31m1<VecSize>>& state,
                               std::uint64_t num_to_skip) {
     std::uint64_t loc_A = power(static_cast<std::uint64_t>(mcg31m1_param::a), num_to_skip);
     state.s = custom_mod<std::uint32_t>(loc_A * static_cast<std::uint64_t>(state.s));
 }
 
 template <std::int32_t VecSize>
-static inline void init(engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>>& state,
+static inline void init(engine_state<oneapi::math::rng::device::mcg31m1<VecSize>>& state,
                         std::uint32_t seed, std::uint64_t offset) {
     state.s = custom_mod<std::uint32_t>(seed);
     if (state.s == 0)
@@ -151,7 +151,7 @@ static inline void init(engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>>
 
 template <std::int32_t VecSize>
 static inline sycl::vec<std::uint32_t, VecSize> generate(
-    engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::mcg31m1<VecSize>>& state) {
     sycl::vec<std::uint64_t, VecSize> x(state.s);
     sycl::vec<std::uint32_t, VecSize> res;
 #ifndef __HIPSYCL__
@@ -167,7 +167,7 @@ static inline sycl::vec<std::uint32_t, VecSize> generate(
 
 template <std::int32_t VecSize>
 static inline std::uint32_t generate_single(
-    engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::mcg31m1<VecSize>>& state) {
     std::uint32_t x = state.s;
     state.s = custom_mod<std::uint32_t>(mcg31m1_param::a * static_cast<std::uint64_t>(state.s));
     return x;
@@ -176,7 +176,7 @@ static inline std::uint32_t generate_single(
 } // namespace mcg31m1_impl
 
 template <std::int32_t VecSize>
-class engine_base<oneapi::mkl::rng::device::mcg31m1<VecSize>> {
+class engine_base<oneapi::math::rng::device::mcg31m1<VecSize>> {
 protected:
     engine_base(std::uint32_t seed, std::uint64_t offset = 0) {
         mcg31m1_impl::init(this->state_, seed, offset);
@@ -223,11 +223,11 @@ class engine_base<oneapi::mkl::rng::device::mcg31m1<VecSize>> {
         detail::mcg31m1_impl::skip_ahead(this->state_, num_to_skip);
     }
 
-    engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>> state_;
+    engine_state<oneapi::math::rng::device::mcg31m1<VecSize>> state_;
 };
 
 } // namespace detail
 
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_MCG31M1_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_MCG31M1_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp b/include/oneapi/math/rng/device/detail/mcg59_impl.hpp
similarity index 92%
rename from include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp
rename to include/oneapi/math/rng/device/detail/mcg59_impl.hpp
index a70bb323d..64f73fee4 100644
--- a/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/mcg59_impl.hpp
@@ -17,10 +17,10 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_MCG59_IMPL_HPP_
-#define _MKL_RNG_DEVICE_MCG59_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_MCG59_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_MCG59_IMPL_HPP_
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 template <std::int32_t VecSize = 1>
 class mcg59;
@@ -71,7 +71,7 @@ struct mcg59_param {
 };
 
 template <std::int32_t VecSize>
-struct engine_state<oneapi::mkl::rng::device::mcg59<VecSize>> {
+struct engine_state<oneapi::math::rng::device::mcg59<VecSize>> {
     std::uint64_t s;
 };
 
@@ -103,14 +103,14 @@ static inline std::uint64_t power(std::uint64_t a, std::uint64_t n) {
 }
 
 template <std::int32_t VecSize>
-static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::mcg59<VecSize>>& state,
+static inline void skip_ahead(engine_state<oneapi::math::rng::device::mcg59<VecSize>>& state,
                               std::uint64_t num_to_skip) {
     std::uint64_t loc_A = power(mcg59_param::a, num_to_skip);
     state.s = custom_mod(loc_A * state.s);
 }
 
 template <std::int32_t VecSize>
-static inline void init(engine_state<oneapi::mkl::rng::device::mcg59<VecSize>>& state,
+static inline void init(engine_state<oneapi::math::rng::device::mcg59<VecSize>>& state,
                         std::uint64_t seed, std::uint64_t offset) {
     state.s = seed & mcg59_param::m_64;
     if (state.s == 0)
@@ -121,7 +121,7 @@ static inline void init(engine_state<oneapi::mkl::rng::device::mcg59<VecSize>>&
 
 template <std::int32_t VecSize>
 static inline sycl::vec<std::uint64_t, VecSize> generate(
-    engine_state<oneapi::mkl::rng::device::mcg59<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::mcg59<VecSize>>& state) {
     sycl::vec<std::uint64_t, VecSize> res(state.s);
 #ifndef __HIPSYCL__
     res = custom_mod(mcg59_vector_a<VecSize>::vector_a * res);
@@ -135,7 +135,7 @@ static inline sycl::vec<std::uint64_t, VecSize> generate(
 
 template <std::int32_t VecSize>
 static inline std::uint64_t generate_single(
-    engine_state<oneapi::mkl::rng::device::mcg59<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::mcg59<VecSize>>& state) {
     std::uint64_t x = state.s;
     state.s = custom_mod(mcg59_param::a * x);
     return x;
@@ -144,7 +144,7 @@ static inline std::uint64_t generate_single(
 } // namespace mcg59_impl
 
 template <std::int32_t VecSize>
-class engine_base<oneapi::mkl::rng::device::mcg59<VecSize>> {
+class engine_base<oneapi::math::rng::device::mcg59<VecSize>> {
 protected:
     engine_base(std::uint64_t seed, std::uint64_t offset = 0) {
         mcg59_impl::init(this->state_, seed, offset);
@@ -266,10 +266,10 @@ class engine_base<oneapi::mkl::rng::device::mcg59<VecSize>> {
         detail::mcg59_impl::skip_ahead(this->state_, num_to_skip);
     }
 
-    engine_state<oneapi::mkl::rng::device::mcg59<VecSize>> state_;
+    engine_state<oneapi::math::rng::device::mcg59<VecSize>> state_;
 };
 
 } // namespace detail
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_MCG59_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_MCG59_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp b/include/oneapi/math/rng/device/detail/mrg32k3a_impl.hpp
similarity index 91%
rename from include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp
rename to include/oneapi/math/rng/device/detail/mrg32k3a_impl.hpp
index 596e625ad..8b4d91967 100644
--- a/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/mrg32k3a_impl.hpp
@@ -22,12 +22,12 @@
 // (2011). Parallelisation Techniques for Random Number Generators.
 // GPU Computing Gems Emerald Edition. 10.1016/B978-0-12-384988-5.00016-4
 
-#ifndef _MKL_RNG_DEVICE_MRG32K3A_IMPL_HPP_
-#define _MKL_RNG_DEVICE_MRG32K3A_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_MRG32K3A_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_MRG32K3A_IMPL_HPP_
 
-#include "oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp"
+#include "oneapi/math/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp"
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 template <std::int32_t VecSize = 1>
 class mrg32k3a;
@@ -35,7 +35,7 @@ class mrg32k3a;
 namespace detail {
 
 template <std::int32_t VecSize>
-struct engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>> {
+struct engine_state<oneapi::math::rng::device::mrg32k3a<VecSize>> {
     std::uint32_t s[6];
 };
 
@@ -176,13 +176,13 @@ static inline void vec3_pow_mod(
 }
 
 template <std::int32_t VecSize>
-static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state,
+static inline void skip_ahead(engine_state<oneapi::math::rng::device::mrg32k3a<VecSize>>& state,
                               std::uint64_t n, const std::uint64_t* num_to_skip_ptr) {
     if (n > 3) {
         n = 3;
 #ifndef __SYCL_DEVICE_ONLY__
-        throw oneapi::mkl::invalid_argument("rng", "mrg32k3a",
-                                            "period is 2 ^ 191, skip on more than 2^192");
+        throw oneapi::math::invalid_argument("rng", "mrg32k3a",
+                                             "period is 2 ^ 191, skip on more than 2^192");
 #endif
     }
     vec3_pow_mod<mrg32k3a_params::m1>(state.s, n, num_to_skip_ptr, skip_ahead_matrix[0]);
@@ -190,7 +190,8 @@ static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::mrg32k3a<Ve
 }
 
 template <std::int32_t VecSize>
-static inline void validate_seed(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state) {
+static inline void validate_seed(
+    engine_state<oneapi::math::rng::device::mrg32k3a<VecSize>>& state) {
     int i;
     for (i = 0; i < 3; i++) {
         if (state.s[i] >= mrg32k3a_params::m1) {
@@ -212,7 +213,7 @@ static inline void validate_seed(engine_state<oneapi::mkl::rng::device::mrg32k3a
 }
 
 template <std::int32_t VecSize>
-static inline void init(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state,
+static inline void init(engine_state<oneapi::math::rng::device::mrg32k3a<VecSize>>& state,
                         std::uint64_t n, const std::uint32_t* seed_ptr, std::uint64_t n_offset,
                         const std::uint64_t* offset_ptr) {
     std::uint64_t i;
@@ -231,7 +232,7 @@ static inline void init(engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>
 
 template <std::int32_t VecSize>
 static inline sycl::vec<std::uint32_t, VecSize> generate(
-    engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::mrg32k3a<VecSize>>& state) {
     const std::int32_t num_elements = VecSize;
     sycl::vec<std::uint32_t, VecSize> res;
     std::int64_t x, y;
@@ -270,7 +271,7 @@ static inline sycl::vec<std::uint32_t, VecSize> generate(
 
 template <std::int32_t VecSize>
 static inline std::uint32_t generate_single(
-    engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::mrg32k3a<VecSize>>& state) {
     std::uint32_t res;
     std::int64_t x, y;
     x = mrg32k3a_params::a12 * static_cast<std::int64_t>(state.s[1]) -
@@ -307,7 +308,7 @@ static inline std::uint32_t generate_single(
 } // namespace mrg32k3a_impl
 
 template <std::int32_t VecSize>
-class engine_base<oneapi::mkl::rng::device::mrg32k3a<VecSize>> {
+class engine_base<oneapi::math::rng::device::mrg32k3a<VecSize>> {
 protected:
     engine_base(std::uint32_t seed, std::uint64_t offset = 0) {
         mrg32k3a_impl::init(this->state_, 1, &seed, 1, &offset);
@@ -375,10 +376,10 @@ class engine_base<oneapi::mkl::rng::device::mrg32k3a<VecSize>> {
         detail::mrg32k3a_impl::skip_ahead(this->state_, num_to_skip.size(), num_to_skip.begin());
     }
 
-    engine_state<oneapi::mkl::rng::device::mrg32k3a<VecSize>> state_;
+    engine_state<oneapi::math::rng::device::mrg32k3a<VecSize>> state_;
 };
 
 } // namespace detail
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_MRG32K3A_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_MRG32K3A_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp b/include/oneapi/math/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp
similarity index 99%
rename from include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp
rename to include/oneapi/math/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp
index d1ea8c263..8a30d7727 100644
--- a/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp
+++ b/include/oneapi/math/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp
@@ -17,10 +17,10 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_MRG32K3A_SKIP_AHEAD_MATRIX_HPP_
-#define _MKL_RNG_DEVICE_MRG32K3A_SKIP_AHEAD_MATRIX_HPP_
+#ifndef ONEMATH_RNG_DEVICE_MRG32K3A_SKIP_AHEAD_MATRIX_HPP_
+#define ONEMATH_RNG_DEVICE_MRG32K3A_SKIP_AHEAD_MATRIX_HPP_
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 namespace mrg32k3a_impl {
 
 constexpr std::size_t quantity_of_3x3_matrices = 455; // number of 3x3 matrices for skipping
@@ -3663,6 +3663,6 @@ static const std::uint32_t skip_ahead_matrix[2][quantity_of_3x3_matrices][3][3]
 };
 
 } // namespace mrg32k3a_impl
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_MRG32K3A_SKIP_AHEAD_MATRIX_HPP_
+#endif // ONEMATH_RNG_DEVICE_MRG32K3A_SKIP_AHEAD_MATRIX_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp b/include/oneapi/math/rng/device/detail/philox4x32x10_impl.hpp
similarity index 93%
rename from include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp
rename to include/oneapi/math/rng/device/detail/philox4x32x10_impl.hpp
index f061bb754..ca133d22f 100644
--- a/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/philox4x32x10_impl.hpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
-#define _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
 
 #include <utility> // std::pair
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 template <std::int32_t VecSize = 1>
 class philox4x32x10;
@@ -30,7 +30,7 @@ class philox4x32x10;
 namespace detail {
 
 template <std::int32_t VecSize>
-struct engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
+struct engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>> {
     std::uint32_t key[2];
     std::uint32_t counter[4];
     std::uint32_t part;
@@ -119,8 +119,9 @@ static inline void round_10(std::uint32_t* cnt, std::uint32_t* k) {
 }
 
 template <std::int32_t VecSize>
-static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
-                              std::uint64_t num_to_skip) {
+static inline void skip_ahead(
+    engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>>& state,
+    std::uint64_t num_to_skip) {
     std::uint64_t num_to_skip_tmp = num_to_skip;
     std::uint64_t c_inc;
     std::uint32_t counter[4];
@@ -157,8 +158,9 @@ static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x
 }
 
 template <std::int32_t VecSize>
-static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
-                              std::uint64_t n, const std::uint64_t* num_to_skip_ptr) {
+static inline void skip_ahead(
+    engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>>& state, std::uint64_t n,
+    const std::uint64_t* num_to_skip_ptr) {
     constexpr std::uint64_t uint_max = 0xFFFFFFFFFFFFFFFF;
     std::uint64_t post_buffer, pre_buffer;
     std::int32_t num_elements = 0;
@@ -250,7 +252,7 @@ static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x
 }
 
 template <std::int32_t VecSize>
-static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
+static inline void init(engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>>& state,
                         std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t offset) {
     state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
     state.key[1] = static_cast<std::uint32_t>(seed_ptr[0] >> 32);
@@ -270,7 +272,7 @@ static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<Vec
 }
 
 template <std::int32_t VecSize>
-static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
+static inline void init(engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>>& state,
                         std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t n_offset,
                         const std::uint64_t* offset_ptr) {
     state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
@@ -293,7 +295,7 @@ static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<Vec
 // for VecSize > 4
 template <std::int32_t VecSize>
 __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_full(
-    engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>>& state) {
     const std::int32_t num_elements = VecSize;
     sycl::vec<std::uint32_t, VecSize> res;
 
@@ -347,7 +349,7 @@ __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> g
 // for VecSize <= 4
 template <std::int32_t VecSize>
 __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_small(
-    engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>>& state) {
     const std::int32_t num_elements = VecSize;
     sycl::vec<std::uint32_t, VecSize> res;
 
@@ -383,7 +385,7 @@ __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> g
 
 template <int VecSize>
 __attribute__((always_inline)) static inline std::uint32_t generate_single(
-    engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
+    engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>>& state) {
     std::uint32_t res;
 
     std::uint32_t counter[4];
@@ -413,7 +415,7 @@ __attribute__((always_inline)) static inline std::uint32_t generate_single(
 } // namespace philox4x32x10_impl
 
 template <std::int32_t VecSize>
-class engine_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
+class engine_base<oneapi::math::rng::device::philox4x32x10<VecSize>> {
 protected:
     engine_base(std::uint64_t seed, std::uint64_t offset = 0) {
         philox4x32x10_impl::init(this->state_, 1, &seed, offset);
@@ -543,10 +545,10 @@ class engine_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
                                                num_to_skip.begin());
     }
 
-    engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> state_;
+    engine_state<oneapi::math::rng::device::philox4x32x10<VecSize>> state_;
 };
 
 } // namespace detail
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp b/include/oneapi/math/rng/device/detail/poisson_impl.hpp
similarity index 95%
rename from include/oneapi/mkl/rng/device/detail/poisson_impl.hpp
rename to include/oneapi/math/rng/device/detail/poisson_impl.hpp
index 9fa9b26ec..7c977eaee 100644
--- a/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/poisson_impl.hpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_POISSON_IMPL_HPP_
-#define _MKL_RNG_DEVICE_POISSON_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_POISSON_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_POISSON_IMPL_HPP_
 
 #include <limits>
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 // Implementation of Poisson distribution uses 3 methods depending on lambda parameter:
 //    - table-lookup method [1] for small lambdas (lambda < 60)
@@ -126,7 +126,7 @@ struct poisson_parameters {
 };
 
 template <typename IntType>
-class distribution_base<oneapi::mkl::rng::device::poisson<IntType, poisson_method::devroye>> {
+class distribution_base<oneapi::math::rng::device::poisson<IntType, poisson_method::devroye>> {
 public:
     struct param_type {
         param_type(double lambda) : lambda_(lambda) {}
@@ -136,7 +136,7 @@ class distribution_base<oneapi::mkl::rng::device::poisson<IntType, poisson_metho
     distribution_base(double lambda) : lambda_(lambda) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (lambda_ <= 0.0) {
-            throw oneapi::mkl::invalid_argument("rng", "poisson", "lambda <= 0");
+            throw oneapi::math::invalid_argument("rng", "poisson", "lambda <= 0");
         }
 #endif
         params_.set_lambda(lambda_);
@@ -153,7 +153,7 @@ class distribution_base<oneapi::mkl::rng::device::poisson<IntType, poisson_metho
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.lambda_ <= 0.0) {
-            throw oneapi::mkl::invalid_argument("rng", "poisson", "lambda <= 0");
+            throw oneapi::math::invalid_argument("rng", "poisson", "lambda <= 0");
         }
 #endif
         lambda_ = pt.lambda_;
@@ -344,12 +344,12 @@ class distribution_base<oneapi::mkl::rng::device::poisson<IntType, poisson_metho
         return res;
     }
 
-    distribution_base<oneapi::mkl::rng::device::gaussian<double>> gaussian_ = { 0.0, 1.0 };
-    distribution_base<oneapi::mkl::rng::device::exponential<double>> exponential_ = { 0.0, 1.0 };
+    distribution_base<oneapi::math::rng::device::gaussian<double>> gaussian_ = { 0.0, 1.0 };
+    distribution_base<oneapi::math::rng::device::exponential<double>> exponential_ = { 0.0, 1.0 };
     poisson_parameters params_;
     double lambda_;
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_POISSON_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_POISSON_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp b/include/oneapi/math/rng/device/detail/uniform_bits_impl.hpp
similarity index 76%
rename from include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp
rename to include/oneapi/math/rng/device/detail/uniform_bits_impl.hpp
index cd3cd2eed..a86833459 100644
--- a/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/uniform_bits_impl.hpp
@@ -17,15 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
-#define _MKL_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
 
 #include "engine_base.hpp"
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 template <typename UIntType>
-class distribution_base<oneapi::mkl::rng::device::uniform_bits<UIntType>> {
+class distribution_base<oneapi::math::rng::device::uniform_bits<UIntType>> {
 protected:
     template <typename EngineType>
     auto generate(EngineType& engine) ->
@@ -33,7 +33,7 @@ class distribution_base<oneapi::mkl::rng::device::uniform_bits<UIntType>> {
                                   sycl::vec<UIntType, EngineType::vec_size>>::type {
         static_assert(std::is_same<EngineType, philox4x32x10<EngineType::vec_size>>::value ||
                           std::is_same<EngineType, mcg59<EngineType::vec_size>>::value,
-                      "oneMKL: uniform_bits works only with philox4x32x10/mcg59 engines");
+                      "oneMath: uniform_bits works only with philox4x32x10/mcg59 engines");
         return engine.template generate_uniform_bits<UIntType>();
     }
 
@@ -41,11 +41,11 @@ class distribution_base<oneapi::mkl::rng::device::uniform_bits<UIntType>> {
     UIntType generate_single(EngineType& engine) {
         static_assert(std::is_same<EngineType, philox4x32x10<EngineType::vec_size>>::value ||
                           std::is_same<EngineType, mcg59<EngineType::vec_size>>::value,
-                      "oneMKL: uniform_bits works only with philox4x32x10/mcg59 engines");
+                      "oneMath: uniform_bits works only with philox4x32x10/mcg59 engines");
         return engine.template generate_single_uniform_bits<UIntType>();
     }
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp b/include/oneapi/math/rng/device/detail/uniform_impl.hpp
similarity index 96%
rename from include/oneapi/mkl/rng/device/detail/uniform_impl.hpp
rename to include/oneapi/math/rng/device/detail/uniform_impl.hpp
index ec50eb8fc..d5d1ecc78 100644
--- a/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp
+++ b/include/oneapi/math/rng/device/detail/uniform_impl.hpp
@@ -17,14 +17,14 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_UNIFORM_IMPL_HPP_
-#define _MKL_RNG_DEVICE_UNIFORM_IMPL_HPP_
+#ifndef ONEMATH_RNG_DEVICE_UNIFORM_IMPL_HPP_
+#define ONEMATH_RNG_DEVICE_UNIFORM_IMPL_HPP_
 
 #include <limits>
 #include <cmath>
 #include "engine_base.hpp"
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 static inline std::uint64_t umul_hi_64(const std::uint64_t a, const std::uint64_t b) {
     const std::uint64_t a_lo = a & 0xFFFFFFFFULL;
@@ -63,7 +63,7 @@ static inline void generate_leftover(std::uint64_t range, Generator generate, st
 }
 
 template <typename Type, typename Method>
-class distribution_base<oneapi::mkl::rng::device::uniform<Type, Method>> {
+class distribution_base<oneapi::math::rng::device::uniform<Type, Method>> {
 public:
     struct param_type {
         param_type(Type a, Type b) : a_(a), b_(b) {}
@@ -74,7 +74,7 @@ class distribution_base<oneapi::mkl::rng::device::uniform<Type, Method>> {
     distribution_base(Type a, Type b) : a_(a), b_(b) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (a >= b) {
-            throw oneapi::mkl::invalid_argument("rng", "uniform", "a >= b");
+            throw oneapi::math::invalid_argument("rng", "uniform", "a >= b");
         }
 #endif
     }
@@ -94,7 +94,7 @@ class distribution_base<oneapi::mkl::rng::device::uniform<Type, Method>> {
     void param(const param_type& pt) {
 #ifndef __SYCL_DEVICE_ONLY__
         if (pt.a_ >= pt.b_) {
-            throw oneapi::mkl::invalid_argument("rng", "uniform", "a >= b");
+            throw oneapi::math::invalid_argument("rng", "uniform", "a >= b");
         }
 #endif
         a_ = pt.a_;
@@ -318,6 +318,6 @@ class distribution_base<oneapi::mkl::rng::device::uniform<Type, Method>> {
     Type b_;
 };
 
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_UNIFORM_IMPL_HPP_
+#endif // ONEMATH_RNG_DEVICE_UNIFORM_IMPL_HPP_
diff --git a/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp b/include/oneapi/math/rng/device/detail/vm_wrappers.hpp
similarity index 89%
rename from include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp
rename to include/oneapi/math/rng/device/detail/vm_wrappers.hpp
index 850945a4c..ddb0035bf 100644
--- a/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp
+++ b/include/oneapi/math/rng/device/detail/vm_wrappers.hpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_VM_WRAPPERS_HPP_
-#define _MKL_RNG_DEVICE_VM_WRAPPERS_HPP_
+#ifndef ONEMATH_RNG_DEVICE_VM_WRAPPERS_HPP_
+#define ONEMATH_RNG_DEVICE_VM_WRAPPERS_HPP_
 
 #include <cmath>
 
-namespace oneapi::mkl::rng::device::detail {
+namespace oneapi::math::rng::device::detail {
 
 template <typename DataType>
 static inline DataType sqrt_wrapper(DataType a) {
@@ -70,6 +70,6 @@ template <typename DataType>
 static inline DataType exp_wrapper(DataType a) {
     return sycl::exp(a);
 }
-} // namespace oneapi::mkl::rng::device::detail
+} // namespace oneapi::math::rng::device::detail
 
-#endif // _MKL_RNG_DEVICE_VM_WRAPPERS_HPP_
+#endif // ONEMATH_RNG_DEVICE_VM_WRAPPERS_HPP_
diff --git a/include/oneapi/mkl/rng/device/distributions.hpp b/include/oneapi/math/rng/device/distributions.hpp
similarity index 88%
rename from include/oneapi/mkl/rng/device/distributions.hpp
rename to include/oneapi/math/rng/device/distributions.hpp
index 121e81aa3..6def09d45 100644
--- a/include/oneapi/mkl/rng/device/distributions.hpp
+++ b/include/oneapi/math/rng/device/distributions.hpp
@@ -17,19 +17,19 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_DISTRIBUTIONS_HPP_
-#define _MKL_RNG_DEVICE_DISTRIBUTIONS_HPP_
+#ifndef ONEMATH_RNG_DEVICE_DISTRIBUTIONS_HPP_
+#define ONEMATH_RNG_DEVICE_DISTRIBUTIONS_HPP_
 
 #include <limits>
 
-#include "oneapi/mkl/rng/device/detail/distribution_base.hpp"
-#include "oneapi/mkl/rng/device/functions.hpp"
+#include "oneapi/math/rng/device/detail/distribution_base.hpp"
+#include "oneapi/math/rng/device/functions.hpp"
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 // CONTINUOUS AND DISCRETE RANDOM NUMBER DISTRIBUTIONS
 
-// Class template oneapi::mkl::rng::device::uniform
+// Class template oneapi::math::rng::device::uniform
 //
 // Represents continuous and discrete uniform random number distribution
 //
@@ -40,8 +40,8 @@ namespace oneapi::mkl::rng::device {
 //      std::uint32_t
 //
 // Supported methods:
-//      oneapi::mkl::rng::device::uniform_method::standard
-//      oneapi::mkl::rng::device::uniform_method::accurate
+//      oneapi::math::rng::device::uniform_method::standard
+//      oneapi::math::rng::device::uniform_method::accurate
 //
 // Input arguments:
 //      a - left bound. 0.0 by default
@@ -58,14 +58,14 @@ class uniform : detail::distribution_base<uniform<Type, Method>> {
 public:
     static_assert(std::is_same<Method, uniform_method::standard>::value ||
                       std::is_same<Method, uniform_method::accurate>::value,
-                  "oneMKL: rng/uniform: method is incorrect");
+                  "oneMath: rng/uniform: method is incorrect");
 
     static_assert(std::is_same<Type, float>::value || std::is_same<Type, double>::value ||
                       std::is_same<Type, std::int32_t>::value ||
                       std::is_same<Type, std::uint32_t>::value ||
                       std::is_same<Type, std::int64_t>::value ||
                       std::is_same<Type, std::uint64_t>::value,
-                  "oneMKL: rng/uniform: type is not supported");
+                  "oneMath: rng/uniform: type is not supported");
 
     using method_type = Method;
     using result_type = Type;
@@ -112,7 +112,7 @@ class uniform : detail::distribution_base<uniform<Type, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::gaussian
+// Class template oneapi::math::rng::device::gaussian
 //
 // Represents continuous normal random number distribution
 //
@@ -121,8 +121,8 @@ class uniform : detail::distribution_base<uniform<Type, Method>> {
 //      double
 //
 // Supported methods:
-//      oneapi::mkl::rng::device::gaussian_method::box_muller2
-//      oneapi::mkl::rng::device::gaussian_method::icdf
+//      oneapi::math::rng::device::gaussian_method::box_muller2
+//      oneapi::math::rng::device::gaussian_method::icdf
 //
 // Input arguments:
 //      mean   - mean. 0 by default
@@ -136,12 +136,12 @@ class gaussian : detail::distribution_base<gaussian<RealType, Method>> {
                       || std::is_same<Method, gaussian_method::icdf>::value
 #endif
                   ,
-                  "oneMKL: rng/gaussian: method is incorrect");
+                  "oneMath: rng/gaussian: method is incorrect");
 #if !MKL_RNG_USE_BINARY_CODE
     static_assert(!std::is_same<Method, gaussian_method::icdf>::value, "icdf method not supported");
 #endif
     static_assert(std::is_same<RealType, float>::value || std::is_same<RealType, double>::value,
-                  "oneMKL: rng/gaussian: type is not supported");
+                  "oneMath: rng/gaussian: type is not supported");
 
     using method_type = Method;
     using result_type = RealType;
@@ -179,7 +179,7 @@ class gaussian : detail::distribution_base<gaussian<RealType, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::lognormal
+// Class template oneapi::math::rng::device::lognormal
 //
 // Represents continuous lognormal random number distribution
 //
@@ -188,7 +188,7 @@ class gaussian : detail::distribution_base<gaussian<RealType, Method>> {
 //      double
 //
 // Supported methods:
-//      oneapi::mkl::rng::device::lognormal_method::box_muller2
+//      oneapi::math::rng::device::lognormal_method::box_muller2
 //
 // Input arguments:
 //      m     - mean of the subject normal distribution. 0.0 by default
@@ -200,10 +200,10 @@ template <typename RealType, typename Method>
 class lognormal : detail::distribution_base<lognormal<RealType, Method>> {
 public:
     static_assert(std::is_same<Method, lognormal_method::box_muller2>::value,
-                  "oneMKL: rng/lognormal: method is incorrect");
+                  "oneMath: rng/lognormal: method is incorrect");
 
     static_assert(std::is_same<RealType, float>::value || std::is_same<RealType, double>::value,
-                  "oneMKL: rng/lognormal: type is not supported");
+                  "oneMath: rng/lognormal: type is not supported");
 
     using method_type = Method;
     using result_type = RealType;
@@ -252,7 +252,7 @@ class lognormal : detail::distribution_base<lognormal<RealType, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::beta
+// Class template oneapi::math::rng::device::beta
 //
 // Represents continuous beta random number distribution
 //
@@ -261,8 +261,8 @@ class lognormal : detail::distribution_base<lognormal<RealType, Method>> {
 //      double
 //
 // Supported methods:
-//      oneapi::mkl::rng::device::beta_method::cja
-//      oneapi::mkl::rng::device::beta_method::cja_accurate
+//      oneapi::math::rng::device::beta_method::cja
+//      oneapi::math::rng::device::beta_method::cja_accurate
 //
 // Input arguments:
 //      p - shape. 1.0 by default
@@ -275,10 +275,10 @@ class beta : detail::distribution_base<beta<RealType, Method>> {
 public:
     static_assert(std::is_same<Method, beta_method::cja>::value ||
                       std::is_same<Method, beta_method::cja_accurate>::value,
-                  "oneMKL: rng/beta: method is incorrect");
+                  "oneMath: rng/beta: method is incorrect");
 
     static_assert(std::is_same<RealType, float>::value || std::is_same<RealType, double>::value,
-                  "oneMKL: rng/beta: type is not supported");
+                  "oneMath: rng/beta: type is not supported");
 
     using method_type = Method;
     using result_type = RealType;
@@ -330,7 +330,7 @@ class beta : detail::distribution_base<beta<RealType, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::gamma
+// Class template oneapi::math::rng::device::gamma
 //
 // Represents continuous gamma random number distribution
 //
@@ -339,8 +339,8 @@ class beta : detail::distribution_base<beta<RealType, Method>> {
 //      double
 //
 // Supported methods:
-//      oneapi::mkl::rng::device::gamma_method::marsaglia
-//      oneapi::mkl::rng::device::gamma_method::marsaglia_accurate
+//      oneapi::math::rng::device::gamma_method::marsaglia
+//      oneapi::math::rng::device::gamma_method::marsaglia_accurate
 //
 // Input arguments:
 //      alpha - shape. 1.0 by default
@@ -352,10 +352,10 @@ class gamma : detail::distribution_base<gamma<RealType, Method>> {
 public:
     static_assert(std::is_same<Method, gamma_method::marsaglia>::value ||
                       std::is_same<Method, gamma_method::marsaglia_accurate>::value,
-                  "oneMKL: rng/gamma: method is incorrect");
+                  "oneMath: rng/gamma: method is incorrect");
 
     static_assert(std::is_same<RealType, float>::value || std::is_same<RealType, double>::value,
-                  "oneMKL: rng/gamma: type is not supported");
+                  "oneMath: rng/gamma: type is not supported");
 
     using method_type = Method;
     using result_type = RealType;
@@ -403,7 +403,7 @@ class gamma : detail::distribution_base<gamma<RealType, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::uniform_bits
+// Class template oneapi::math::rng::device::uniform_bits
 //
 // Represents discrete uniform bits random number distribution
 //
@@ -416,7 +416,7 @@ class uniform_bits : detail::distribution_base<uniform_bits<UIntType>> {
 public:
     static_assert(std::is_same<UIntType, std::uint32_t>::value ||
                       std::is_same<UIntType, std::uint64_t>::value,
-                  "oneMKL: rng/uniform_bits: type is not supported");
+                  "oneMath: rng/uniform_bits: type is not supported");
     using result_type = UIntType;
 
 private:
@@ -429,7 +429,7 @@ class uniform_bits : detail::distribution_base<uniform_bits<UIntType>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::bits
+// Class template oneapi::math::rng::device::bits
 //
 // Represents bits of underlying random number engine
 //
@@ -442,7 +442,7 @@ class bits : detail::distribution_base<bits<UIntType>> {
 public:
     static_assert(std::is_same<UIntType, std::uint32_t>::value ||
                       std::is_same<UIntType, std::uint64_t>::value,
-                  "oneMKL: rng/bits: type is not supported");
+                  "oneMath: rng/bits: type is not supported");
     using result_type = UIntType;
 
 private:
@@ -455,7 +455,7 @@ class bits : detail::distribution_base<bits<UIntType>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::exponential
+// Class template oneapi::math::rng::device::exponential
 //
 // Represents continuous exponential random number distribution
 //
@@ -464,8 +464,8 @@ class bits : detail::distribution_base<bits<UIntType>> {
 //      double
 //
 // Supported methods:
-//      oneapi::mkl::rng::device::exponential_method::icdf
-//      oneapi::mkl::rng::device::exponential_method::icdf_accurate
+//      oneapi::math::rng::device::exponential_method::icdf
+//      oneapi::math::rng::device::exponential_method::icdf_accurate
 //
 // Input arguments:
 //      displ - displacement. 0.0 by default
@@ -476,10 +476,10 @@ class exponential : detail::distribution_base<exponential<RealType, Method>> {
 public:
     static_assert(std::is_same<Method, exponential_method::icdf>::value ||
                       std::is_same<Method, exponential_method::icdf_accurate>::value,
-                  "oneMKL: rng/exponential: method is incorrect");
+                  "oneMath: rng/exponential: method is incorrect");
 
     static_assert(std::is_same<RealType, float>::value || std::is_same<RealType, double>::value,
-                  "oneMKL: rng/exponential: type is not supported");
+                  "oneMath: rng/exponential: type is not supported");
 
     using method_type = Method;
     using result_type = RealType;
@@ -520,7 +520,7 @@ class exponential : detail::distribution_base<exponential<RealType, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::poisson
+// Class template oneapi::math::rng::device::poisson
 //
 // Represents discrete poisson random number distribution
 //
@@ -529,7 +529,7 @@ class exponential : detail::distribution_base<exponential<RealType, Method>> {
 //      std::uint32_t
 //
 // Supported methods:
-//      oneapi::mkl::rng::device::poisson_method::devroye
+//      oneapi::math::rng::device::poisson_method::devroye
 //
 // Input arguments:
 //      lambda - mean value. 1.0 by default
@@ -538,11 +538,11 @@ template <typename IntType, typename Method>
 class poisson : detail::distribution_base<poisson<IntType, Method>> {
 public:
     static_assert(std::is_same<Method, poisson_method::devroye>::value,
-                  "oneMKL: rng/poisson: method is incorrect");
+                  "oneMath: rng/poisson: method is incorrect");
 
     static_assert(std::is_same<IntType, std::int32_t>::value ||
                       std::is_same<IntType, std::uint32_t>::value,
-                  "oneMKL: rng/poisson: type is not supported");
+                  "oneMath: rng/poisson: type is not supported");
 
     using method_type = Method;
     using result_type = IntType;
@@ -574,7 +574,7 @@ class poisson : detail::distribution_base<poisson<IntType, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-// Class template oneapi::mkl::rng::device::bernoulli
+// Class template oneapi::math::rng::device::bernoulli
 //
 // Represents discrete Bernoulli random number distribution
 //
@@ -583,7 +583,7 @@ class poisson : detail::distribution_base<poisson<IntType, Method>> {
 //      std::int32_t
 //
 // Supported methods:
-//      oneapi::mkl::rng::bernoulli_method::icdf;
+//      oneapi::math::rng::bernoulli_method::icdf;
 //
 // Input arguments:
 //      p - success probablity of a trial. 0.5 by default
@@ -592,7 +592,7 @@ template <typename IntType, typename Method>
 class bernoulli : detail::distribution_base<bernoulli<IntType, Method>> {
 public:
     static_assert(std::is_same<Method, bernoulli_method::icdf>::value,
-                  "oneMKL: rng/bernoulli: method is incorrect");
+                  "oneMath: rng/bernoulli: method is incorrect");
 
     static_assert(std::is_same<IntType, std::int32_t>::value ||
                       std::is_same<IntType, std::uint32_t>::value ||
@@ -600,7 +600,7 @@ class bernoulli : detail::distribution_base<bernoulli<IntType, Method>> {
                       std::is_same<IntType, std::uint16_t>::value ||
                       std::is_same<IntType, std::int8_t>::value ||
                       std::is_same<IntType, std::uint8_t>::value,
-                  "oneMKL: rng/bernoulli: type is not supported");
+                  "oneMath: rng/bernoulli: type is not supported");
 
     using method_type = Method;
     using result_type = IntType;
@@ -632,6 +632,6 @@ class bernoulli : detail::distribution_base<bernoulli<IntType, Method>> {
     friend typename Distr::result_type generate_single(Distr& distr, Engine& engine);
 };
 
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_DISTRIBUTIONS_HPP_
+#endif // ONEMATH_RNG_DEVICE_DISTRIBUTIONS_HPP_
diff --git a/include/oneapi/mkl/rng/device/engines.hpp b/include/oneapi/math/rng/device/engines.hpp
similarity index 90%
rename from include/oneapi/mkl/rng/device/engines.hpp
rename to include/oneapi/math/rng/device/engines.hpp
index f1bcfd1b0..c5bddbe94 100644
--- a/include/oneapi/mkl/rng/device/engines.hpp
+++ b/include/oneapi/math/rng/device/engines.hpp
@@ -17,20 +17,20 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_ENGINES_HPP_
-#define _MKL_RNG_DEVICE_ENGINES_HPP_
+#ifndef ONEMATH_RNG_DEVICE_ENGINES_HPP_
+#define ONEMATH_RNG_DEVICE_ENGINES_HPP_
 
 #include <limits>
 
-#include "oneapi/mkl/rng/device/types.hpp"
-#include "oneapi/mkl/rng/device/functions.hpp"
-#include "oneapi/mkl/rng/device/detail/engine_base.hpp"
+#include "oneapi/math/rng/device/types.hpp"
+#include "oneapi/math/rng/device/functions.hpp"
+#include "oneapi/math/rng/device/detail/engine_base.hpp"
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 // PSEUDO-RANDOM NUMBER DEVICE-SIDE ENGINES
 
-// Class template oneapi::mkl::rng::device::philox4x32x10
+// Class template oneapi::math::rng::device::philox4x32x10
 //
 // Represents Philox4x32-10 counter-based pseudorandom number generator
 //
@@ -71,7 +71,7 @@ class philox4x32x10 : detail::engine_base<philox4x32x10<VecSize>> {
     friend class detail::distribution_base;
 };
 
-// Class oneapi::mkl::rng::device::mrg32k3a
+// Class oneapi::math::rng::device::mrg32k3a
 //
 // Represents the combined recurcive pseudorandom number generator
 //
@@ -111,7 +111,7 @@ class mrg32k3a : detail::engine_base<mrg32k3a<VecSize>> {
     friend class detail::distribution_base;
 };
 
-// Class oneapi::mkl::rng::device::mcg31m1
+// Class oneapi::math::rng::device::mcg31m1
 //
 //
 //
@@ -138,7 +138,7 @@ class mcg31m1 : detail::engine_base<mcg31m1<VecSize>> {
     friend class detail::distribution_base;
 };
 
-// Class oneapi::mkl::rng::device::mcg59
+// Class oneapi::math::rng::device::mcg59
 //
 //
 //
@@ -165,6 +165,6 @@ class mcg59 : detail::engine_base<mcg59<VecSize>> {
     friend class detail::distribution_base;
 };
 
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_ENGINES_HPP_
+#endif // ONEMATH_RNG_DEVICE_ENGINES_HPP_
diff --git a/include/oneapi/mkl/rng/device/functions.hpp b/include/oneapi/math/rng/device/functions.hpp
similarity index 84%
rename from include/oneapi/mkl/rng/device/functions.hpp
rename to include/oneapi/math/rng/device/functions.hpp
index d8542b836..ccd6d5cc8 100644
--- a/include/oneapi/mkl/rng/device/functions.hpp
+++ b/include/oneapi/math/rng/device/functions.hpp
@@ -17,14 +17,14 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_FUNCTIONS_HPP_
-#define _MKL_RNG_DEVICE_FUNCTIONS_HPP_
+#ifndef ONEMATH_RNG_DEVICE_FUNCTIONS_HPP_
+#define ONEMATH_RNG_DEVICE_FUNCTIONS_HPP_
 
 #include <sycl/sycl.hpp>
 
-#include "oneapi/mkl/rng/device/detail/distribution_base.hpp"
+#include "oneapi/math/rng/device/detail/distribution_base.hpp"
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 // GENERATE FUNCTIONS
 
@@ -47,6 +47,6 @@ void skip_ahead(Engine& engine, std::initializer_list<std::uint64_t> num_to_skip
     engine.skip_ahead(num_to_skip);
 }
 
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_FUNCTIONS_HPP_
+#endif // ONEMATH_RNG_DEVICE_FUNCTIONS_HPP_
diff --git a/include/oneapi/mkl/rng/device/types.hpp b/include/oneapi/math/rng/device/types.hpp
similarity index 90%
rename from include/oneapi/mkl/rng/device/types.hpp
rename to include/oneapi/math/rng/device/types.hpp
index 6f87917f8..d2cb7ac37 100644
--- a/include/oneapi/mkl/rng/device/types.hpp
+++ b/include/oneapi/math/rng/device/types.hpp
@@ -17,10 +17,10 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_DEVICE_TYPES_HPP_
-#define _MKL_RNG_DEVICE_TYPES_HPP_
+#ifndef ONEMATH_RNG_DEVICE_TYPES_HPP_
+#define ONEMATH_RNG_DEVICE_TYPES_HPP_
 
-namespace oneapi::mkl::rng::device {
+namespace oneapi::math::rng::device {
 
 // METHODS FOR DISTRIBUTIONS
 
@@ -69,6 +69,6 @@ struct marsaglia_accurate {};
 using by_default = marsaglia;
 } // namespace gamma_method
 
-} // namespace oneapi::mkl::rng::device
+} // namespace oneapi::math::rng::device
 
-#endif // _MKL_RNG_DEVICE_TYPES_HPP_
+#endif // ONEMATH_RNG_DEVICE_TYPES_HPP_
diff --git a/include/oneapi/mkl/rng/distributions.hpp b/include/oneapi/math/rng/distributions.hpp
similarity index 83%
rename from include/oneapi/mkl/rng/distributions.hpp
rename to include/oneapi/math/rng/distributions.hpp
index 88d1e46e7..f20e1333f 100644
--- a/include/oneapi/mkl/rng/distributions.hpp
+++ b/include/oneapi/math/rng/distributions.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_DISTRIBUTIONS_HPP_
-#define _ONEMKL_RNG_DISTRIBUTIONS_HPP_
+#ifndef _ONEMATH_RNG_DISTRIBUTIONS_HPP_
+#define _ONEMATH_RNG_DISTRIBUTIONS_HPP_
 
 #include <cstdint>
 #include <limits>
@@ -28,13 +28,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 
-// Class template oneapi::mkl::rng::uniform
+// Class template oneapi::math::rng::uniform
 //
 // Represents continuous and discrete uniform random number distribution
 //
@@ -44,8 +44,8 @@ namespace rng {
 //      std::int32_t
 //
 // Supported methods:
-//      oneapi::mkl::rng::uniform_method::standard
-//      oneapi::mkl::rng::uniform_method::accurate - for float and double types only
+//      oneapi::math::rng::uniform_method::standard
+//      oneapi::math::rng::uniform_method::accurate - for float and double types only
 //
 // Input arguments:
 //      a - left bound. 0.0 by default
@@ -75,8 +75,8 @@ class uniform {
 
     explicit uniform(Type a, Type b) : a_(a), b_(b) {
         if (a >= b) {
-            throw oneapi::mkl::invalid_argument("rng", "uniform",
-                                                "parameters are incorrect, a >= b");
+            throw oneapi::math::invalid_argument("rng", "uniform",
+                                                 "parameters are incorrect, a >= b");
         }
     }
 
@@ -103,8 +103,8 @@ class uniform<std::int32_t, Method> {
 
     explicit uniform(std::int32_t a, std::int32_t b) : a_(a), b_(b) {
         if (a >= b) {
-            throw oneapi::mkl::invalid_argument("rng", "uniform",
-                                                "parameters are incorrect, a >= b");
+            throw oneapi::math::invalid_argument("rng", "uniform",
+                                                 "parameters are incorrect, a >= b");
         }
     }
 
@@ -121,7 +121,7 @@ class uniform<std::int32_t, Method> {
     std::int32_t b_;
 };
 
-// Class template oneapi::mkl::rng::gaussian
+// Class template oneapi::math::rng::gaussian
 //
 // Represents continuous normal random number distribution
 //
@@ -130,8 +130,8 @@ class uniform<std::int32_t, Method> {
 //      double
 //
 // Supported methods:
-//      oneapi::mkl::rng::gaussian_method::box_muller2
-//      oneapi::mkl::rng::gaussian_method::icdf
+//      oneapi::math::rng::gaussian_method::box_muller2
+//      oneapi::math::rng::gaussian_method::icdf
 //
 // Input arguments:
 //      mean   - mean. 0 by default
@@ -160,8 +160,8 @@ class gaussian {
 
     explicit gaussian(RealType mean, RealType stddev) : mean_(mean), stddev_(stddev) {
         if (stddev <= static_cast<RealType>(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "gaussian",
-                                                "stddev parameter is incorrect, stddev <= 0.0");
+            throw oneapi::math::invalid_argument("rng", "gaussian",
+                                                 "stddev parameter is incorrect, stddev <= 0.0");
         }
     }
 
@@ -178,7 +178,7 @@ class gaussian {
     RealType stddev_;
 };
 
-// Class template oneapi::mkl::rng::lognormal
+// Class template oneapi::math::rng::lognormal
 //
 // Represents continuous lognormal random number distribution
 //
@@ -187,8 +187,8 @@ class gaussian {
 //      double
 //
 // Supported methods:
-//      oneapi::mkl::rng::lognormal_method::box_muller2
-//      oneapi::mkl::rng::lognormal_method::icdf
+//      oneapi::math::rng::lognormal_method::box_muller2
+//      oneapi::math::rng::lognormal_method::icdf
 //
 // Input arguments:
 //      m     - mean of the subject normal distribution. 0.0 by default
@@ -226,10 +226,10 @@ class lognormal {
               displ_(displ),
               scale_(scale) {
         if (s <= static_cast<RealType>(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "lognormal", "s <= 0");
+            throw oneapi::math::invalid_argument("rng", "lognormal", "s <= 0");
         }
         if (scale <= static_cast<RealType>(0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "lognormal", "scale <= 0");
+            throw oneapi::math::invalid_argument("rng", "lognormal", "scale <= 0");
         }
     }
 
@@ -256,7 +256,7 @@ class lognormal {
     RealType scale_;
 };
 
-// Class template oneapi::mkl::rng::bernoulli
+// Class template oneapi::math::rng::bernoulli
 //
 // Represents discrete Bernoulli random number distribution
 //
@@ -265,7 +265,7 @@ class lognormal {
 //      std::int32_t
 //
 // Supported methods:
-//      oneapi::mkl::rng::bernoulli_method::icdf;
+//      oneapi::math::rng::bernoulli_method::icdf;
 //
 // Input arguments:
 //      p - success probability of a trial. 0.5 by default
@@ -292,7 +292,7 @@ class bernoulli {
 
     explicit bernoulli(float p) : p_(p) {
         if ((p > 1.0f) || (p < 0.0f)) {
-            throw oneapi::mkl::invalid_argument("rng", "bernoulli", "p > 1 or p < 0");
+            throw oneapi::math::invalid_argument("rng", "bernoulli", "p > 1 or p < 0");
         }
     }
 
@@ -304,7 +304,7 @@ class bernoulli {
     float p_;
 };
 
-// Class template oneapi::mkl::rng::poisson
+// Class template oneapi::math::rng::poisson
 //
 // Represents discrete Poisson random number distribution
 //
@@ -312,7 +312,7 @@ class bernoulli {
 //      std::int32_t
 //
 // Supported methods:
-//      oneapi::mkl::rng::poisson_method::gaussian_icdf_based
+//      oneapi::math::rng::poisson_method::gaussian_icdf_based
 //
 // Input arguments:
 //      lambda - distribution parameter. 0.5 by default
@@ -339,7 +339,7 @@ class poisson {
 
     explicit poisson(double lambda) : lambda_(lambda) {
         if ((lambda <= 0.0)) {
-            throw oneapi::mkl::invalid_argument("rng", "poisson", "lamdba < 0");
+            throw oneapi::math::invalid_argument("rng", "poisson", "lamdba < 0");
         }
     }
 
@@ -351,7 +351,7 @@ class poisson {
     double lambda_;
 };
 
-// Class template oneapi::mkl::rng::bits
+// Class template oneapi::math::rng::bits
 //
 // Represents bits of underlying random number engine
 //
@@ -367,7 +367,7 @@ class bits {
 };
 
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_DISTRIBUTIONS_HPP_
+#endif //_ONEMATH_RNG_DISTRIBUTIONS_HPP_
diff --git a/include/oneapi/mkl/rng/engines.hpp b/include/oneapi/math/rng/engines.hpp
similarity index 87%
rename from include/oneapi/mkl/rng/engines.hpp
rename to include/oneapi/math/rng/engines.hpp
index 610fda70b..f09243459 100644
--- a/include/oneapi/mkl/rng/engines.hpp
+++ b/include/oneapi/math/rng/engines.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_ENGINES_HPP_
-#define _ONEMKL_RNG_ENGINES_HPP_
+#ifndef _ONEMATH_RNG_ENGINES_HPP_
+#define _ONEMATH_RNG_ENGINES_HPP_
 
 #include <cstdint>
 #include <limits>
@@ -29,30 +29,30 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
 
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-#include "oneapi/mkl/rng/detail/rng_loader.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/detail/rng_loader.hpp"
 
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
-#include "oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp"
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
+#include "oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp"
 #endif
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
-#include "oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp"
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
+#include "oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp"
 #endif
-#ifdef ONEMKL_ENABLE_CURAND_BACKEND
-#include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp"
+#ifdef ONEMATH_ENABLE_CURAND_BACKEND
+#include "oneapi/math/rng/detail/curand/onemath_rng_curand.hpp"
 #endif
-#ifdef ONEMKL_ENABLE_ROCRAND_BACKEND
-#include "oneapi/mkl/rng/detail/rocrand/onemkl_rng_rocrand.hpp"
+#ifdef ONEMATH_ENABLE_ROCRAND_BACKEND
+#include "oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp"
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 
-// Class oneapi::mkl::rng::philox4x32x10
+// Class oneapi::math::rng::philox4x32x10
 //
 // Represents Philox4x32-10 counter-based pseudorandom number generator
 //
@@ -68,7 +68,7 @@ class philox4x32x10 {
     philox4x32x10(sycl::queue queue, std::initializer_list<std::uint64_t> seed)
             : pimpl_(detail::create_philox4x32x10(get_device_id(queue), queue, seed)) {}
 
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
     philox4x32x10(backend_selector<backend::mklcpu> selector, std::uint64_t seed = default_seed)
             : pimpl_(mklcpu::create_philox4x32x10(selector.get_queue(), seed)) {}
 
@@ -77,7 +77,7 @@ class philox4x32x10 {
             : pimpl_(mklcpu::create_philox4x32x10(selector.get_queue(), seed)) {}
 #endif
 
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
     philox4x32x10(backend_selector<backend::mklgpu> selector, std::uint64_t seed = default_seed)
             : pimpl_(mklgpu::create_philox4x32x10(selector.get_queue(), seed)) {}
 
@@ -86,7 +86,7 @@ class philox4x32x10 {
             : pimpl_(mklgpu::create_philox4x32x10(selector.get_queue(), seed)) {}
 #endif
 
-#ifdef ONEMKL_ENABLE_CURAND_BACKEND
+#ifdef ONEMATH_ENABLE_CURAND_BACKEND
     philox4x32x10(backend_selector<backend::curand> selector, std::uint64_t seed = default_seed)
             : pimpl_(curand::create_philox4x32x10(selector.get_queue(), seed)) {}
 
@@ -94,7 +94,7 @@ class philox4x32x10 {
                   std::initializer_list<std::uint64_t> seed)
             : pimpl_(curand::create_philox4x32x10(selector.get_queue(), seed)) {}
 #endif
-#ifdef ONEMKL_ENABLE_ROCRAND_BACKEND
+#ifdef ONEMATH_ENABLE_ROCRAND_BACKEND
     philox4x32x10(backend_selector<backend::rocrand> selector, std::uint64_t seed = default_seed)
             : pimpl_(rocrand::create_philox4x32x10(selector.get_queue(), seed)) {}
 
@@ -144,7 +144,7 @@ class philox4x32x10 {
                                 const std::vector<sycl::event>& dependencies);
 };
 
-// Class oneapi::mkl::rng::mrg32k3a
+// Class oneapi::math::rng::mrg32k3a
 //
 // Represents the combined recurcive pseudorandom number generator
 //
@@ -160,7 +160,7 @@ class mrg32k3a {
     mrg32k3a(sycl::queue queue, std::initializer_list<std::uint32_t> seed)
             : pimpl_(detail::create_mrg32k3a(get_device_id(queue), queue, seed)) {}
 
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
     mrg32k3a(backend_selector<backend::mklcpu> selector, std::uint32_t seed = default_seed)
             : pimpl_(mklcpu::create_mrg32k3a(selector.get_queue(), seed)) {}
 
@@ -168,7 +168,7 @@ class mrg32k3a {
             : pimpl_(mklcpu::create_mrg32k3a(selector.get_queue(), seed)) {}
 #endif
 
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
     mrg32k3a(backend_selector<backend::mklgpu> selector, std::uint32_t seed = default_seed)
             : pimpl_(mklgpu::create_mrg32k3a(selector.get_queue(), seed)) {}
 
@@ -176,7 +176,7 @@ class mrg32k3a {
             : pimpl_(mklgpu::create_mrg32k3a(selector.get_queue(), seed)) {}
 #endif
 
-#ifdef ONEMKL_ENABLE_CURAND_BACKEND
+#ifdef ONEMATH_ENABLE_CURAND_BACKEND
     mrg32k3a(backend_selector<backend::curand> selector, std::uint32_t seed = default_seed)
             : pimpl_(curand::create_mrg32k3a(selector.get_queue(), seed)) {}
 
@@ -184,7 +184,7 @@ class mrg32k3a {
             : pimpl_(curand::create_mrg32k3a(selector.get_queue(), seed)) {}
 #endif
 
-#ifdef ONEMKL_ENABLE_ROCRAND_BACKEND
+#ifdef ONEMATH_ENABLE_ROCRAND_BACKEND
     mrg32k3a(backend_selector<backend::rocrand> selector, std::uint32_t seed = default_seed)
             : pimpl_(rocrand::create_mrg32k3a(selector.get_queue(), seed)) {}
 
@@ -237,7 +237,7 @@ class mrg32k3a {
 using default_engine = philox4x32x10;
 
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_ENGINES_HPP_
+#endif //_ONEMATH_RNG_ENGINES_HPP_
diff --git a/include/oneapi/mkl/rng/functions.hpp b/include/oneapi/math/rng/functions.hpp
similarity index 88%
rename from include/oneapi/mkl/rng/functions.hpp
rename to include/oneapi/math/rng/functions.hpp
index 028e13557..72a370551 100644
--- a/include/oneapi/mkl/rng/functions.hpp
+++ b/include/oneapi/math/rng/functions.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_FUNCTIONS_HPP_
-#define _ONEMKL_RNG_FUNCTIONS_HPP_
+#ifndef _ONEMATH_RNG_FUNCTIONS_HPP_
+#define _ONEMATH_RNG_FUNCTIONS_HPP_
 
 #include <cstdint>
 #if __has_include(<sycl/sycl.hpp>)
@@ -27,14 +27,14 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/predicates.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/predicates.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 
-// Function oneapi::mkl::rng::generate().Buffer API
+// Function oneapi::math::rng::generate().Buffer API
 // Provides random numbers from a given engine with a given statistics
 //
 // Input parameters:
@@ -51,7 +51,7 @@ static inline void generate(const Distr& distr, Engine& engine, std::int64_t n,
     engine.pimpl_->generate(distr, n, r);
 }
 
-// Function oneapi::mkl::rng::generate(). USM API
+// Function oneapi::math::rng::generate(). USM API
 // Provides random numbers from a given engine with a given statistics
 //
 // Input parameters:
@@ -76,7 +76,7 @@ static inline sycl::event generate(const Distr& distr, Engine& engine, std::int6
 
 //  SERVICE FUNCTIONS
 
-// Function oneapi::mkl::rng::skip_ahead(). Common interface
+// Function oneapi::math::rng::skip_ahead(). Common interface
 //
 // Proceeds state of engine using the skip-ahead method
 //
@@ -88,7 +88,7 @@ static inline void skip_ahead(Engine& engine, std::uint64_t num_to_skip) {
     engine.pimpl_->skip_ahead(num_to_skip);
 }
 
-// Function oneapi::mkl::rng::skip_ahead(). Interface with partitioned number of skipped elements
+// Function oneapi::math::rng::skip_ahead(). Interface with partitioned number of skipped elements
 //
 // Proceeds state of engine using the skip-ahead method
 //
@@ -100,7 +100,7 @@ static inline void skip_ahead(Engine& engine, std::initializer_list<std::uint64_
     engine.pimpl_->skip_ahead(num_to_skip);
 }
 
-// Function oneapi::mkl::rng::leapfrog()
+// Function oneapi::math::rng::leapfrog()
 //
 // Proceeds state of engine using the leapfrog method
 //
@@ -114,7 +114,7 @@ static inline void leapfrog(Engine& engine, std::uint64_t idx, std::uint64_t str
 }
 
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_FUNCTIONS_HPP_
+#endif //_ONEMATH_RNG_FUNCTIONS_HPP_
diff --git a/include/oneapi/mkl/rng/predicates.hpp b/include/oneapi/math/rng/predicates.hpp
similarity index 76%
rename from include/oneapi/mkl/rng/predicates.hpp
rename to include/oneapi/math/rng/predicates.hpp
index 10422e543..07c4e4591 100644
--- a/include/oneapi/mkl/rng/predicates.hpp
+++ b/include/oneapi/math/rng/predicates.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_PREDICATES_HPP_
-#define _ONEMKL_RNG_PREDICATES_HPP_
+#ifndef _ONEMATH_RNG_PREDICATES_HPP_
+#define _ONEMATH_RNG_PREDICATES_HPP_
 
 #include <cstdint>
 #if __has_include(<sycl/sycl.hpp>)
@@ -27,11 +27,11 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 
 // Buffer APIs
@@ -39,9 +39,9 @@ namespace rng {
 template <typename Distr, typename Engine>
 inline void generate_precondition(const Distr& /*distr*/, Engine& /*engine*/, std::int64_t n,
                                   sycl::buffer<typename Distr::result_type, 1>& r) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     if (n < 0 || n > r.size()) {
-        throw oneapi::mkl::invalid_argument("rng", "generate", "n");
+        throw oneapi::math::invalid_argument("rng", "generate", "n");
     }
 #endif
 }
@@ -52,18 +52,18 @@ template <typename Distr, typename Engine>
 inline void generate_precondition(const Distr& /*distr*/, Engine& /*engine*/, std::int64_t n,
                                   typename Distr::result_type* r,
                                   const std::vector<sycl::event>& /*dependencies*/) {
-#ifndef ONEMKL_DISABLE_PREDICATES
+#ifndef ONEMATH_DISABLE_PREDICATES
     if (n < 0) {
-        throw oneapi::mkl::invalid_argument("rng", "generate", "n");
+        throw oneapi::math::invalid_argument("rng", "generate", "n");
     }
     if (r == nullptr) {
-        throw oneapi::mkl::invalid_argument("rng", "generate", "r is nullptr");
+        throw oneapi::math::invalid_argument("rng", "generate", "r is nullptr");
     }
 #endif
 }
 
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_RNG_PREDICATES_HPP_
+#endif //_ONEMATH_RNG_PREDICATES_HPP_
diff --git a/include/oneapi/math/sparse_blas.hpp b/include/oneapi/math/sparse_blas.hpp
new file mode 100644
index 000000000..ee9735374
--- /dev/null
+++ b/include/oneapi/math/sparse_blas.hpp
@@ -0,0 +1,43 @@
+/***************************************************************************
+*  Copyright (C) Codeplay Software Limited
+*  Licensed under the Apache License, Version 2.0 (the "License");
+*  you may not use this file except in compliance with the License.
+*  You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+*  For your convenience, a copy of the License has been included in this
+*  repository.
+*
+*  Unless required by applicable law or agreed to in writing, software
+*  distributed under the License is distributed on an "AS IS" BASIS,
+*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+*  See the License for the specific language governing permissions and
+*  limitations under the License.
+*
+**************************************************************************/
+
+#ifndef _ONEMATH_SPARSE_BLAS_HPP_
+#define _ONEMATH_SPARSE_BLAS_HPP_
+
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
+#include <CL/sycl.hpp>
+#endif
+
+#include "oneapi/math/detail/config.hpp"
+
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
+#include "sparse_blas/detail/mklcpu/sparse_blas_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
+#include "sparse_blas/detail/mklgpu/sparse_blas_ct.hpp"
+#endif
+#ifdef ONEMATH_ENABLE_CUSPARSE_BACKEND
+#include "sparse_blas/detail/cusparse/sparse_blas_ct.hpp"
+#endif
+
+#include "sparse_blas/detail/sparse_blas_rt.hpp"
+
+#endif // _ONEMATH_SPARSE_BLAS_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp b/include/oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp
similarity index 61%
rename from include/oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp
rename to include/oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp
index c8e816eeb..718553766 100644
--- a/include/oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp
+++ b/include/oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp
@@ -17,17 +17,17 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_CUSPARSE_ONEMKL_SPARSE_BLAS_CUSPARSE_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_CUSPARSE_ONEMKL_SPARSE_BLAS_CUSPARSE_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_CUSPARSE_ONEMATH_SPARSE_BLAS_CUSPARSE_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_CUSPARSE_ONEMATH_SPARSE_BLAS_CUSPARSE_HPP_
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp"
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/sparse_blas/detail/helper_types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 
-namespace oneapi::mkl::sparse::cusparse {
+namespace oneapi::math::sparse::cusparse {
 
-#include "oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx"
+#include "oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx"
 
-} // namespace oneapi::mkl::sparse::cusparse
+} // namespace oneapi::math::sparse::cusparse
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_CUSPARSE_ONEMKL_SPARSE_BLAS_CUSPARSE_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_CUSPARSE_ONEMATH_SPARSE_BLAS_CUSPARSE_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/cusparse/sparse_blas_ct.hpp b/include/oneapi/math/sparse_blas/detail/cusparse/sparse_blas_ct.hpp
similarity index 69%
rename from include/oneapi/mkl/sparse_blas/detail/cusparse/sparse_blas_ct.hpp
rename to include/oneapi/math/sparse_blas/detail/cusparse/sparse_blas_ct.hpp
index 11abb9a6f..ed714b542 100644
--- a/include/oneapi/mkl/sparse_blas/detail/cusparse/sparse_blas_ct.hpp
+++ b/include/oneapi/math/sparse_blas/detail/cusparse/sparse_blas_ct.hpp
@@ -17,24 +17,24 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_CUSPARSE_SPARSE_BLAS_CT_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_CUSPARSE_SPARSE_BLAS_CT_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_CUSPARSE_SPARSE_BLAS_CT_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_CUSPARSE_SPARSE_BLAS_CT_HPP_
 
-#include "oneapi/mkl/detail/backends.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
+#include "oneapi/math/detail/backends.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
 
-#include "onemkl_sparse_blas_cusparse.hpp"
+#include "onemath_sparse_blas_cusparse.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace sparse {
 
 #define BACKEND cusparse
-#include "oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx"
+#include "oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx"
 #undef BACKEND
 
 } //namespace sparse
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_CUSPARSE_SPARSE_BLAS_CT_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_CUSPARSE_SPARSE_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/handles.hpp b/include/oneapi/math/sparse_blas/detail/handles.hpp
similarity index 84%
rename from include/oneapi/mkl/sparse_blas/detail/handles.hpp
rename to include/oneapi/math/sparse_blas/detail/handles.hpp
index 0566f93b4..15153339c 100644
--- a/include/oneapi/mkl/sparse_blas/detail/handles.hpp
+++ b/include/oneapi/math/sparse_blas/detail/handles.hpp
@@ -17,10 +17,10 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_HANDLES_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_HANDLES_HPP_
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 // Each backend can create its own handle type or re-use the native handle types that will be reinterpret_cast'ed to the types below
 
@@ -33,6 +33,6 @@ using dense_vector_handle_t = dense_vector_handle*;
 struct matrix_handle;
 using matrix_handle_t = matrix_handle*;
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_HANDLES_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp b/include/oneapi/math/sparse_blas/detail/helper_types.hpp
similarity index 88%
rename from include/oneapi/mkl/sparse_blas/detail/helper_types.hpp
rename to include/oneapi/math/sparse_blas/detail/helper_types.hpp
index 75ee22211..49c462d04 100644
--- a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp
+++ b/include/oneapi/math/sparse_blas/detail/helper_types.hpp
@@ -17,15 +17,15 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_
 
 #include <complex>
 #include <cstdint>
 #include <type_traits>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace sparse {
 namespace detail {
 
@@ -44,7 +44,7 @@ inline constexpr bool are_fp_int_supported_v =
 
 } // namespace detail
 } // namespace sparse
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp b/include/oneapi/math/sparse_blas/detail/mklcpu/onemath_sparse_blas_mklcpu.hpp
similarity index 60%
rename from include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp
rename to include/oneapi/math/sparse_blas/detail/mklcpu/onemath_sparse_blas_mklcpu.hpp
index 8686d35bc..59544a503 100644
--- a/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp
+++ b/include/oneapi/math/sparse_blas/detail/mklcpu/onemath_sparse_blas_mklcpu.hpp
@@ -17,19 +17,19 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_ONEMKL_SPARSE_BLAS_MKLCPU_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_ONEMKL_SPARSE_BLAS_MKLCPU_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_MKLCPU_ONEMATH_SPARSE_BLAS_MKLCPU_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_MKLCPU_ONEMATH_SPARSE_BLAS_MKLCPU_HPP_
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp"
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/sparse_blas/detail/helper_types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 
-namespace oneapi::mkl::sparse::mklcpu {
+namespace oneapi::math::sparse::mklcpu {
 
-namespace detail = oneapi::mkl::sparse::detail;
+namespace detail = oneapi::math::sparse::detail;
 
-#include "oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx"
+#include "oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx"
 
-} // namespace oneapi::mkl::sparse::mklcpu
+} // namespace oneapi::math::sparse::mklcpu
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_ONEMKL_SPARSE_BLAS_MKLCPU_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_MKLCPU_ONEMATH_SPARSE_BLAS_MKLCPU_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp b/include/oneapi/math/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp
similarity index 69%
rename from include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp
rename to include/oneapi/math/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp
index ee127c3f8..fc8077eff 100644
--- a/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp
+++ b/include/oneapi/math/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp
@@ -17,24 +17,24 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_
 
-#include "oneapi/mkl/detail/backends.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
+#include "oneapi/math/detail/backends.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
 
-#include "onemkl_sparse_blas_mklcpu.hpp"
+#include "onemath_sparse_blas_mklcpu.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace sparse {
 
 #define BACKEND mklcpu
-#include "oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx"
+#include "oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx"
 #undef BACKEND
 
 } //namespace sparse
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp b/include/oneapi/math/sparse_blas/detail/mklgpu/onemath_sparse_blas_mklgpu.hpp
similarity index 60%
rename from include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp
rename to include/oneapi/math/sparse_blas/detail/mklgpu/onemath_sparse_blas_mklgpu.hpp
index eb3aaa5ff..651ee7731 100644
--- a/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp
+++ b/include/oneapi/math/sparse_blas/detail/mklgpu/onemath_sparse_blas_mklgpu.hpp
@@ -17,19 +17,19 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_ONEMKL_SPARSE_BLAS_MKLGPU_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_ONEMKL_SPARSE_BLAS_MKLGPU_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_MKLGPU_ONEMATH_SPARSE_BLAS_MKLGPU_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_MKLGPU_ONEMATH_SPARSE_BLAS_MKLGPU_HPP_
 
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp"
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/detail/export.hpp"
+#include "oneapi/math/sparse_blas/detail/helper_types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 
-namespace oneapi::mkl::sparse::mklgpu {
+namespace oneapi::math::sparse::mklgpu {
 
-namespace detail = oneapi::mkl::sparse::detail;
+namespace detail = oneapi::math::sparse::detail;
 
-#include "oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx"
+#include "oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx"
 
-} // namespace oneapi::mkl::sparse::mklgpu
+} // namespace oneapi::math::sparse::mklgpu
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_ONEMKL_SPARSE_BLAS_MKLGPU_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_MKLGPU_ONEMATH_SPARSE_BLAS_MKLGPU_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp b/include/oneapi/math/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp
similarity index 69%
rename from include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp
rename to include/oneapi/math/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp
index d3b0d365f..7e49e6305 100644
--- a/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp
+++ b/include/oneapi/math/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp
@@ -17,24 +17,24 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_
 
-#include "oneapi/mkl/detail/backends.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
+#include "oneapi/math/detail/backends.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
 
-#include "onemkl_sparse_blas_mklgpu.hpp"
+#include "onemath_sparse_blas_mklgpu.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace sparse {
 
 #define BACKEND mklgpu
-#include "oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx"
+#include "oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx"
 #undef BACKEND
 
 } //namespace sparse
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_
diff --git a/include/oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx b/include/oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx
new file mode 100644
index 000000000..71e22a664
--- /dev/null
+++ b/include/oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx
@@ -0,0 +1,218 @@
+/***************************************************************************
+*  Copyright(C) Codeplay Software Limited
+*  Licensed under the Apache License, Version 2.0(the "License");
+*  you may not use this file except in compliance with the License.
+*  You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+*  For your convenience, a copy of the License has been included in this
+*  repository.
+*
+*  Unless required by applicable law or agreed to in writing, software
+*  distributed under the License is distributed on an "AS IS" BASIS,
+*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+*  See the License for the specific language governing permissions and
+*  limitations under the License.
+*
+**************************************************************************/
+
+// This file is meant to be included in each backend onemath_sparse_blas_BACKEND.hpp files.
+// It is used to exports each symbol to the onemath_sparse_blas_BACKEND library.
+
+// Dense vector
+template <typename dataType>
+ONEMATH_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle,
+                                      std::int64_t size, sycl::buffer<dataType, 1> val);
+template <typename dataType>
+ONEMATH_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle,
+                                      std::int64_t size, dataType* val);
+
+template <typename dataType>
+ONEMATH_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle,
+                                          std::int64_t size, sycl::buffer<dataType, 1> val);
+template <typename dataType>
+ONEMATH_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle,
+                                          std::int64_t size, dataType* val);
+
+ONEMATH_EXPORT sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle,
+                                                const std::vector<sycl::event>& dependencies = {});
+
+// Dense matrix
+template <typename dataType>
+ONEMATH_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle,
+                                      std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
+                                      layout dense_layout, sycl::buffer<dataType, 1> val);
+template <typename dataType>
+ONEMATH_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle,
+                                      std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
+                                      layout dense_layout, dataType* val);
+
+template <typename dataType>
+ONEMATH_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle,
+                                          std::int64_t num_rows, std::int64_t num_cols,
+                                          std::int64_t ld, layout dense_layout,
+                                          sycl::buffer<dataType, 1> val);
+template <typename dataType>
+ONEMATH_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle,
+                                          std::int64_t num_rows, std::int64_t num_cols,
+                                          std::int64_t ld, layout dense_layout, dataType* val);
+
+ONEMATH_EXPORT sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle,
+                                                const std::vector<sycl::event>& dependencies = {});
+
+// COO matrix
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
+                                    std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
+                                    index_base index, sycl::buffer<indexType, 1> row_ind,
+                                    sycl::buffer<indexType, 1> col_ind,
+                                    sycl::buffer<dataType, 1> val);
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
+                                    std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
+                                    index_base index, indexType* row_ind, indexType* col_ind,
+                                    dataType* val);
+
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
+                                        std::int64_t num_rows, std::int64_t num_cols,
+                                        std::int64_t nnz, index_base index,
+                                        sycl::buffer<indexType, 1> row_ind,
+                                        sycl::buffer<indexType, 1> col_ind,
+                                        sycl::buffer<dataType, 1> val);
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
+                                        std::int64_t num_rows, std::int64_t num_cols,
+                                        std::int64_t nnz, index_base index, indexType* row_ind,
+                                        indexType* col_ind, dataType* val);
+
+// CSR matrix
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
+                                    std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
+                                    index_base index, sycl::buffer<indexType, 1> row_ptr,
+                                    sycl::buffer<indexType, 1> col_ind,
+                                    sycl::buffer<dataType, 1> val);
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
+                                    std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
+                                    index_base index, indexType* row_ptr, indexType* col_ind,
+                                    dataType* val);
+
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
+                                        std::int64_t num_rows, std::int64_t num_cols,
+                                        std::int64_t nnz, index_base index,
+                                        sycl::buffer<indexType, 1> row_ptr,
+                                        sycl::buffer<indexType, 1> col_ind,
+                                        sycl::buffer<dataType, 1> val);
+template <typename dataType, typename indexType>
+ONEMATH_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
+                                        std::int64_t num_rows, std::int64_t num_cols,
+                                        std::int64_t nnz, index_base index, indexType* row_ptr,
+                                        indexType* col_ind, dataType* val);
+
+// Common sparse matrix functions
+ONEMATH_EXPORT sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle,
+                                                 const std::vector<sycl::event>& dependencies = {});
+
+bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property);
+
+// SPMM
+ONEMATH_EXPORT void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr);
+
+ONEMATH_EXPORT sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr,
+                                              const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT void spmm_buffer_size(sycl::queue& queue, oneapi::math::transpose opA,
+                                     oneapi::math::transpose opB, const void* alpha,
+                                     matrix_view A_view, matrix_handle_t A_handle,
+                                     dense_matrix_handle_t B_handle, const void* beta,
+                                     dense_matrix_handle_t C_handle, spmm_alg alg,
+                                     spmm_descr_t spmm_descr, std::size_t& temp_buffer_size);
+
+ONEMATH_EXPORT void spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                                  oneapi::math::transpose opB, const void* alpha,
+                                  matrix_view A_view, matrix_handle_t A_handle,
+                                  dense_matrix_handle_t B_handle, const void* beta,
+                                  dense_matrix_handle_t C_handle, spmm_alg alg,
+                                  spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace);
+
+ONEMATH_EXPORT sycl::event spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                                         oneapi::math::transpose opB, const void* alpha,
+                                         matrix_view A_view, matrix_handle_t A_handle,
+                                         dense_matrix_handle_t B_handle, const void* beta,
+                                         dense_matrix_handle_t C_handle, spmm_alg alg,
+                                         spmm_descr_t spmm_descr, void* workspace,
+                                         const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spmm(sycl::queue& queue, oneapi::math::transpose opA,
+                                oneapi::math::transpose opB, const void* alpha, matrix_view A_view,
+                                matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                                const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
+                                spmm_descr_t spmm_descr,
+                                const std::vector<sycl::event>& dependencies = {});
+
+// SPMV
+ONEMATH_EXPORT void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr);
+
+ONEMATH_EXPORT sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr,
+                                              const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT void spmv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA,
+                                     const void* alpha, matrix_view A_view,
+                                     matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                                     const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
+                                     spmv_descr_t spmv_descr, std::size_t& temp_buffer_size);
+
+ONEMATH_EXPORT void spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                                  const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
+                                  dense_vector_handle_t x_handle, const void* beta,
+                                  dense_vector_handle_t y_handle, spmv_alg alg,
+                                  spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> workspace);
+
+ONEMATH_EXPORT sycl::event spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                                         const void* alpha, matrix_view A_view,
+                                         matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                                         const void* beta, dense_vector_handle_t y_handle,
+                                         spmv_alg alg, spmv_descr_t spmv_descr, void* workspace,
+                                         const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+                                matrix_view A_view, matrix_handle_t A_handle,
+                                dense_vector_handle_t x_handle, const void* beta,
+                                dense_vector_handle_t y_handle, spmv_alg alg,
+                                spmv_descr_t spmv_descr,
+                                const std::vector<sycl::event>& dependencies = {});
+
+// SPSV
+ONEMATH_EXPORT void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr);
+
+ONEMATH_EXPORT sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr,
+                                              const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT void spsv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA,
+                                     const void* alpha, matrix_view A_view,
+                                     matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                                     dense_vector_handle_t y_handle, spsv_alg alg,
+                                     spsv_descr_t spsv_descr, std::size_t& temp_buffer_size);
+
+ONEMATH_EXPORT void spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                                  const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
+                                  dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
+                                  spsv_alg alg, spsv_descr_t spsv_descr,
+                                  sycl::buffer<std::uint8_t, 1> workspace);
+
+ONEMATH_EXPORT sycl::event spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                                         const void* alpha, matrix_view A_view,
+                                         matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                                         dense_vector_handle_t y_handle, spsv_alg alg,
+                                         spsv_descr_t spsv_descr, void* workspace,
+                                         const std::vector<sycl::event>& dependencies = {});
+
+ONEMATH_EXPORT sycl::event spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+                                matrix_view A_view, matrix_handle_t A_handle,
+                                dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
+                                spsv_alg alg, spsv_descr_t spsv_descr,
+                                const std::vector<sycl::event>& dependencies = {});
diff --git a/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp b/include/oneapi/math/sparse_blas/detail/operation_types.hpp
similarity index 82%
rename from include/oneapi/mkl/sparse_blas/detail/operation_types.hpp
rename to include/oneapi/math/sparse_blas/detail/operation_types.hpp
index b79036830..495cfdc70 100644
--- a/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp
+++ b/include/oneapi/math/sparse_blas/detail/operation_types.hpp
@@ -17,10 +17,10 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 // Each backend can create its own descriptor type or re-use the native descriptor types that will be reinterpret_cast'ed to the types below
 
@@ -33,6 +33,6 @@ using spmv_descr_t = spmv_descr*;
 struct spsv_descr;
 using spsv_descr_t = spsv_descr*;
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx b/include/oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx
similarity index 94%
rename from include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx
rename to include/oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx
index aacc32ce3..6d7b05d3d 100644
--- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx
+++ b/include/oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx
@@ -18,7 +18,7 @@
 **************************************************************************/
 
 // This file is meant to be included in each backend sparse_blas_ct.hpp files
-// Each function calls the implementation from onemkl_sparse_blas_backends.hxx
+// Each function calls the implementation from onemath_sparse_blas_backends.hxx
 
 #ifndef BACKEND
 #error "BACKEND is not defined"
@@ -193,7 +193,7 @@ inline sycl::event release_spmm_descr(backend_selector<backend::BACKEND> selecto
 }
 
 inline void spmm_buffer_size(backend_selector<backend::BACKEND> selector,
-                             oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+                             oneapi::math::transpose opA, oneapi::math::transpose opB,
                              const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                              dense_matrix_handle_t B_handle, const void* beta,
                              dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr,
@@ -202,8 +202,8 @@ inline void spmm_buffer_size(backend_selector<backend::BACKEND> selector,
                               beta, C_handle, alg, spmm_descr, temp_buffer_size);
 }
 
-inline void spmm_optimize(backend_selector<backend::BACKEND> selector, oneapi::mkl::transpose opA,
-                          oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
+inline void spmm_optimize(backend_selector<backend::BACKEND> selector, oneapi::math::transpose opA,
+                          oneapi::math::transpose opB, const void* alpha, matrix_view A_view,
                           matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
                           const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
                           spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace) {
@@ -212,7 +212,7 @@ inline void spmm_optimize(backend_selector<backend::BACKEND> selector, oneapi::m
 }
 
 inline sycl::event spmm_optimize(backend_selector<backend::BACKEND> selector,
-                                 oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+                                 oneapi::math::transpose opA, oneapi::math::transpose opB,
                                  const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                                  dense_matrix_handle_t B_handle, const void* beta,
                                  dense_matrix_handle_t C_handle, spmm_alg alg,
@@ -222,8 +222,8 @@ inline sycl::event spmm_optimize(backend_selector<backend::BACKEND> selector,
                                   beta, C_handle, alg, spmm_descr, workspace, dependencies);
 }
 
-inline sycl::event spmm(backend_selector<backend::BACKEND> selector, oneapi::mkl::transpose opA,
-                        oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
+inline sycl::event spmm(backend_selector<backend::BACKEND> selector, oneapi::math::transpose opA,
+                        oneapi::math::transpose opB, const void* alpha, matrix_view A_view,
                         matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void* beta,
                         dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr,
                         const std::vector<sycl::event>& dependencies = {}) {
@@ -244,7 +244,7 @@ inline sycl::event release_spmv_descr(backend_selector<backend::BACKEND> selecto
 }
 
 inline void spmv_buffer_size(backend_selector<backend::BACKEND> selector,
-                             oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view,
+                             oneapi::math::transpose opA, const void* alpha, matrix_view A_view,
                              matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                              const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                              spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) {
@@ -252,7 +252,7 @@ inline void spmv_buffer_size(backend_selector<backend::BACKEND> selector,
                               y_handle, alg, spmv_descr, temp_buffer_size);
 }
 
-inline void spmv_optimize(backend_selector<backend::BACKEND> selector, oneapi::mkl::transpose opA,
+inline void spmv_optimize(backend_selector<backend::BACKEND> selector, oneapi::math::transpose opA,
                           const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                           dense_vector_handle_t x_handle, const void* beta,
                           dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
@@ -262,7 +262,7 @@ inline void spmv_optimize(backend_selector<backend::BACKEND> selector, oneapi::m
 }
 
 inline sycl::event spmv_optimize(backend_selector<backend::BACKEND> selector,
-                                 oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view,
+                                 oneapi::math::transpose opA, const void* alpha, matrix_view A_view,
                                  matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                                  const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                                  spmv_descr_t spmv_descr, void* workspace,
@@ -271,7 +271,7 @@ inline sycl::event spmv_optimize(backend_selector<backend::BACKEND> selector,
                                   beta, y_handle, alg, spmv_descr, workspace, dependencies);
 }
 
-inline sycl::event spmv(backend_selector<backend::BACKEND> selector, oneapi::mkl::transpose opA,
+inline sycl::event spmv(backend_selector<backend::BACKEND> selector, oneapi::math::transpose opA,
                         const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                         dense_vector_handle_t x_handle, const void* beta,
                         dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
@@ -293,7 +293,7 @@ inline sycl::event release_spsv_descr(backend_selector<backend::BACKEND> selecto
 }
 
 inline void spsv_buffer_size(backend_selector<backend::BACKEND> selector,
-                             oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view,
+                             oneapi::math::transpose opA, const void* alpha, matrix_view A_view,
                              matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                              dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                              std::size_t& temp_buffer_size) {
@@ -301,7 +301,7 @@ inline void spsv_buffer_size(backend_selector<backend::BACKEND> selector,
                               y_handle, alg, spsv_descr, temp_buffer_size);
 }
 
-inline void spsv_optimize(backend_selector<backend::BACKEND> selector, oneapi::mkl::transpose opA,
+inline void spsv_optimize(backend_selector<backend::BACKEND> selector, oneapi::math::transpose opA,
                           const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                           dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
                           spsv_alg alg, spsv_descr_t spsv_descr,
@@ -311,7 +311,7 @@ inline void spsv_optimize(backend_selector<backend::BACKEND> selector, oneapi::m
 }
 
 inline sycl::event spsv_optimize(backend_selector<backend::BACKEND> selector,
-                                 oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view,
+                                 oneapi::math::transpose opA, const void* alpha, matrix_view A_view,
                                  matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                                  dense_vector_handle_t y_handle, spsv_alg alg,
                                  spsv_descr_t spsv_descr, void* workspace,
@@ -320,7 +320,7 @@ inline sycl::event spsv_optimize(backend_selector<backend::BACKEND> selector,
                                   y_handle, alg, spsv_descr, workspace, dependencies);
 }
 
-inline sycl::event spsv(backend_selector<backend::BACKEND> selector, oneapi::mkl::transpose opA,
+inline sycl::event spsv(backend_selector<backend::BACKEND> selector, oneapi::math::transpose opA,
                         const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                         dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
                         spsv_alg alg, spsv_descr_t spsv_descr,
diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp b/include/oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp
similarity index 87%
rename from include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp
rename to include/oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp
index e99613ba3..8066691e5 100644
--- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp
+++ b/include/oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp
@@ -17,14 +17,14 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_
-#define _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_
+#define _ONEMATH_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_
 
-#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp"
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/sparse_blas/detail/helper_types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace sparse {
 
 // Dense vector
@@ -123,25 +123,25 @@ void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr);
 sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr,
                                const std::vector<sycl::event>& dependencies = {});
 
-void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+void spmm_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB,
                       const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                       dense_matrix_handle_t B_handle, const void* beta,
                       dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr,
                       std::size_t& temp_buffer_size);
 
-void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+void spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB,
                    const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                    dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
                    spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace);
 
-sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                          oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
+sycl::event spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                          oneapi::math::transpose opB, const void* alpha, matrix_view A_view,
                           matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
                           const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
                           spmm_descr_t spmm_descr, void* workspace,
                           const std::vector<sycl::event>& dependencies = {});
 
-sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+sycl::event spmm(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB,
                  const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                  dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
                  spmm_alg alg, spmm_descr_t spmm_descr,
@@ -153,23 +153,23 @@ void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr);
 sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr,
                                const std::vector<sycl::event>& dependencies = {});
 
-void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spmv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                       matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                       spmv_descr_t spmv_descr, std::size_t& temp_buffer_size);
 
-void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                    matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                    const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                    spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> workspace);
 
-sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                           matrix_view A_view, matrix_handle_t A_handle,
                           dense_vector_handle_t x_handle, const void* beta,
                           dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
                           void* workspace, const std::vector<sycl::event>& dependencies = {});
 
-sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                  matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                  const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                  spmv_descr_t spmv_descr, const std::vector<sycl::event>& dependencies = {});
@@ -180,29 +180,29 @@ void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr);
 sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr,
                                const std::vector<sycl::event>& dependencies = {});
 
-void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spsv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                       matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                       std::size_t& temp_buffer_size);
 
-void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                    matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                    dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                    sycl::buffer<std::uint8_t, 1> workspace);
 
-sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                           matrix_view A_view, matrix_handle_t A_handle,
                           dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
                           spsv_alg alg, spsv_descr_t spsv_descr, void* workspace,
                           const std::vector<sycl::event>& dependencies = {});
 
-sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                  matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                  dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                  const std::vector<sycl::event>& dependencies = {});
 
 } // namespace sparse
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/matrix_view.hpp b/include/oneapi/math/sparse_blas/matrix_view.hpp
similarity index 85%
rename from include/oneapi/mkl/sparse_blas/matrix_view.hpp
rename to include/oneapi/math/sparse_blas/matrix_view.hpp
index 08762c5d7..8f817b83f 100644
--- a/include/oneapi/mkl/sparse_blas/matrix_view.hpp
+++ b/include/oneapi/math/sparse_blas/matrix_view.hpp
@@ -17,13 +17,13 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_
-#define _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_MATRIX_VIEW_HPP_
+#define _ONEMATH_SPARSE_BLAS_MATRIX_VIEW_HPP_
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace sparse {
 
 enum class matrix_descr {
@@ -45,7 +45,7 @@ struct matrix_view {
 };
 
 } // namespace sparse
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_MATRIX_VIEW_HPP_
diff --git a/include/oneapi/mkl/sparse_blas/types.hpp b/include/oneapi/math/sparse_blas/types.hpp
similarity index 86%
rename from include/oneapi/mkl/sparse_blas/types.hpp
rename to include/oneapi/math/sparse_blas/types.hpp
index 1a50d6ef4..14e34c6fc 100644
--- a/include/oneapi/mkl/sparse_blas/types.hpp
+++ b/include/oneapi/math/sparse_blas/types.hpp
@@ -17,20 +17,20 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_TYPES_HPP_
-#define _ONEMKL_SPARSE_BLAS_TYPES_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_TYPES_HPP_
+#define _ONEMATH_SPARSE_BLAS_TYPES_HPP_
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include "matrix_view.hpp"
 #include "detail/handles.hpp"
 #include "detail/operation_types.hpp"
 
 /**
- * @file Include and define the sparse types that are common between Intel(R) oneMKL API and oneMKL interfaces API.
+ * @file Include and define the sparse types that are common between Intel(R) oneMKL API and oneMath API.
 */
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace sparse {
 
 enum class matrix_property {
@@ -67,7 +67,7 @@ enum class spsv_alg {
 };
 
 } // namespace sparse
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_SPARSE_BLAS_TYPES_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_TYPES_HPP_
diff --git a/include/oneapi/mkl/types.hpp b/include/oneapi/math/types.hpp
similarity index 94%
rename from include/oneapi/mkl/types.hpp
rename to include/oneapi/math/types.hpp
index 32d336e11..b95a1be0b 100644
--- a/include/oneapi/mkl/types.hpp
+++ b/include/oneapi/math/types.hpp
@@ -17,11 +17,11 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_TYPES_HPP_
-#define _ONEMKL_TYPES_HPP_
+#ifndef _ONEMATH_TYPES_HPP_
+#define _ONEMATH_TYPES_HPP_
 
 #ifdef __HIPSYCL__
-#include "oneapi/mkl/bfloat16.hpp"
+#include "oneapi/math/bfloat16.hpp"
 #endif
 
 #if __has_include(<sycl/sycl.hpp>)
@@ -31,7 +31,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 #ifndef __HIPSYCL__
 using bfloat16 = sycl::ext::oneapi::bfloat16;
@@ -116,7 +116,7 @@ enum class order : char {
     E = 1,
 };
 
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
 
-#endif //_ONEMKL_TYPES_HPP_
+#endif //_ONEMATH_TYPES_HPP_
diff --git a/include/oneapi/mkl.hpp b/include/oneapi/mkl.hpp
index f3e9b8618..2eca1b82c 100644
--- a/include/oneapi/mkl.hpp
+++ b/include/oneapi/mkl.hpp
@@ -17,15 +17,20 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_HPP_
-#define _ONEMKL_HPP_
+#ifndef ONEMATH_MKL_HPP
+#define ONEMATH_MKL_HPP
 
-#include "oneapi/mkl/types.hpp"
+// Deprecated header is planned to be removed late 2025.
+#pragma message("Header `oneapi/mkl.hpp` is deprecated, please use `oneapi/math.hpp` instead")
 
-#include "oneapi/mkl/blas.hpp"
-#include "oneapi/mkl/dft.hpp"
-#include "oneapi/mkl/lapack.hpp"
-#include "oneapi/mkl/rng.hpp"
-#include "oneapi/mkl/sparse_blas.hpp"
+#include "oneapi/math/types.hpp"
 
-#endif //_ONEMKL_HPP_
+#include "oneapi/math/blas.hpp"
+#include "oneapi/math/dft.hpp"
+#include "oneapi/math/lapack.hpp"
+#include "oneapi/math/rng.hpp"
+#include "oneapi/math/sparse_blas.hpp"
+
+#include "namespace_alias.hpp"
+
+#endif // ONEMATH_MKL_HPP
diff --git a/include/oneapi/mkl/blas.hpp b/include/oneapi/mkl/blas.hpp
index 6b8066e69..71b2fed1e 100644
--- a/include/oneapi/mkl/blas.hpp
+++ b/include/oneapi/mkl/blas.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2021 Intel Corporation
+* Copyright 2024 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,57 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_BLAS_HPP_
-#define _ONEMKL_BLAS_HPP_
+#ifndef ONEMATH_MKL_BLAS_HPP
+#define ONEMATH_MKL_BLAS_HPP
 
-#if __has_include(<sycl/sycl.hpp>)
-#include <sycl/sycl.hpp>
-#else
-#include <CL/sycl.hpp>
-#endif
-#include <complex>
-#include <cstdint>
+// Deprecated header is planned to be removed late 2025.
+#pragma message( \
+    "Header `oneapi/mkl/blas.hpp` is deprecated, please use `oneapi/math/blas.hpp` instead")
 
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/blas.hpp"
 
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "namespace_alias.hpp"
 
-#include "oneapi/mkl/blas/detail/blas_loader.hpp"
-#ifdef ONEMKL_ENABLE_CUBLAS_BACKEND
-#include "oneapi/mkl/blas/detail/cublas/blas_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_ROCBLAS_BACKEND
-#include "oneapi/mkl/blas/detail/rocblas/blas_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
-#include "oneapi/mkl/blas/detail/mklcpu/blas_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
-#include "oneapi/mkl/blas/detail/mklgpu/blas_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_NETLIB_BACKEND
-#include "oneapi/mkl/blas/detail/netlib/blas_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_PORTBLAS_BACKEND
-#include "oneapi/mkl/blas/detail/portblas/blas_ct.hpp"
-#endif
-
-namespace oneapi {
-namespace mkl {
-namespace blas {
-namespace column_major {
-
-#include "blas.hxx"
-
-} //namespace column_major
-namespace row_major {
-
-#include "blas.hxx"
-
-} //namespace row_major
-} //namespace blas
-} //namespace mkl
-} //namespace oneapi
-
-#endif //_ONEMKL_BLAS_LOADER_HPP_
+#endif // ONEMATH_MKL_BLAS_HPP
diff --git a/include/oneapi/mkl/blas/detail/blas_loader.hxx b/include/oneapi/mkl/blas/detail/blas_loader.hxx
deleted file mode 100644
index 22ef22283..000000000
--- a/include/oneapi/mkl/blas/detail/blas_loader.hxx
+++ /dev/null
@@ -1,2556 +0,0 @@
-/*******************************************************************************
-* Copyright 2020-2021 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions
-* and limitations under the License.
-*
-*
-* SPDX-License-Identifier: Apache-2.0
-*******************************************************************************/
-
-// Buffer APIs
-
-ONEMKL_EXPORT void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, float beta,
-                        sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, double beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
-                        sycl::buffer<float, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        double alpha, sycl::buffer<double, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx);
-ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx);
-ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
-                        std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a,
-                        std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
-                        sycl::buffer<float, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a,
-                        sycl::buffer<double, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                       sycl::buffer<float, 1>& a);
-ONEMKL_EXPORT void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                       sycl::buffer<double, 1>& a);
-
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
-                              std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, double beta, sycl::buffer<double, 1>& c,
-                              std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, std::complex<float> beta,
-                              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, std::complex<double> beta,
-                              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              sycl::half alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, sycl::half beta,
-                              sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              float alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, float beta,
-                              sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
-                              std::int64_t batch_size);
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              float alpha, sycl::buffer<std::int8_t, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<std::int8_t, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, float beta,
-                              sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
-                              std::int64_t batch_size);
-ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                              transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                              float alpha, sycl::buffer<std::int8_t, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<std::int8_t, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, float beta,
-                              sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
-                        sycl::buffer<float, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
-                        sycl::buffer<double, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                        std::int64_t ldc);
-ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
-                        std::int64_t ldc);
-
-ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                              sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                              float beta, sycl::buffer<float, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                              sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                              double beta, sycl::buffer<double, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              transpose trans, std::int64_t n, std::int64_t k,
-                              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
-                              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              transpose trans, std::int64_t n, std::int64_t k,
-                              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
-                              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-ONEMKL_EXPORT void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                       sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                       sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c,
-                       float s);
-ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                       sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                       sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c,
-                       double s);
-ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                       sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                       std::int64_t incy, float c, float s);
-ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                       sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-                       std::int64_t incy, double c, double s);
-
-ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<float, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<double, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                         sycl::buffer<float, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                         sycl::buffer<double, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                         std::int64_t incx, std::complex<float> beta,
-                         sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                         std::int64_t incx, std::complex<double> beta,
-                         sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-ONEMKL_EXPORT void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-                         std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-                         std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
-                         std::int64_t ldc);
-ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                         std::int64_t ldc);
-ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose trans, std::int64_t n, std::int64_t k,
-                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                         std::int64_t ldb, std::complex<double> beta,
-                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                        std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                        std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
-                        double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                              std::int64_t m, std::int64_t n, float alpha,
-                              sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
-                              sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
-                              float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                              std::int64_t m, std::int64_t n, double alpha,
-                              sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
-                              sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
-                              double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                              std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
-                              sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                              std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
-                              sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              std::int64_t m, std::int64_t n, sycl::buffer<float, 1>& a,
-                              std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, sycl::buffer<float, 1>& c,
-                              std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              std::int64_t m, std::int64_t n, sycl::buffer<double, 1>& a,
-                              std::int64_t lda, std::int64_t stridea, sycl::buffer<double, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, sycl::buffer<double, 1>& c,
-                              std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              std::int64_t m, std::int64_t n,
-                              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
-                              std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              std::int64_t m, std::int64_t n,
-                              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
-                              std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, float alpha, sycl::buffer<std::complex<float>, 1>& x,
-                       std::int64_t incx, sycl::buffer<std::complex<float>, 1>& a,
-                       std::int64_t lda);
-ONEMKL_EXPORT void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, double alpha, sycl::buffer<std::complex<double>, 1>& x,
-                       std::int64_t incx, sycl::buffer<std::complex<double>, 1>& a,
-                       std::int64_t lda);
-
-ONEMKL_EXPORT void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, float alpha, sycl::buffer<std::complex<float>, 1>& x,
-                       std::int64_t incx, sycl::buffer<std::complex<float>, 1>& a);
-ONEMKL_EXPORT void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, double alpha, sycl::buffer<std::complex<double>, 1>& x,
-                       std::int64_t incx, sycl::buffer<std::complex<double>, 1>& a);
-
-ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                             transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
-                             std::int64_t k, float alpha, sycl::buffer<int8_t, 1>& a,
-                             std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
-                             std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
-                             std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
-ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                             transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
-                             std::int64_t k, float alpha, sycl::buffer<int8_t, 1>& a,
-                             std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
-                             std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
-                             std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
-ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                             transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
-                             std::int64_t k, float alpha, sycl::buffer<uint8_t, 1>& a,
-                             std::int64_t lda, uint8_t ao, sycl::buffer<int8_t, 1>& b,
-                             std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
-                             std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
-ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                             transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
-                             std::int64_t k, float alpha, sycl::buffer<uint8_t, 1>& a,
-                             std::int64_t lda, uint8_t ao, sycl::buffer<uint8_t, 1>& b,
-                             std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
-                             std::int64_t ldc, sycl::buffer<int32_t, 1>& co);
-
-ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<float, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<double, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                        sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& d1,
-                         sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
-                         sycl::buffer<float, 1>& param);
-ONEMKL_EXPORT void rotmg(oneapi::mkl::device libkey, sycl::queue& queue,
-                         sycl::buffer<double, 1>& d1, sycl::buffer<double, 1>& d2,
-                         sycl::buffer<double, 1>& x1, double y1, sycl::buffer<double, 1>& param);
-
-ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-ONEMKL_EXPORT void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& result);
-ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& result);
-ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<float, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& result);
-ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<double, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                         sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
-                         sycl::buffer<float, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                         sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
-                         sycl::buffer<double, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
-                         std::int64_t ldb, std::complex<float> beta,
-                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                         std::int64_t ldb, std::complex<double> beta,
-                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                        float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
-                        sycl::buffer<float, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                        double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
-                        sycl::buffer<double, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                        std::int64_t ldc);
-ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                        std::int64_t ldb, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                        sycl::half alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
-                        sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, sycl::half beta,
-                        sycl::buffer<sycl::half, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                        float alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
-                        sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, float beta,
-                        sycl::buffer<float, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                        float alpha, sycl::buffer<bfloat16, 1>& a, std::int64_t lda,
-                        sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
-                        sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
-                        std::int64_t lda);
-ONEMKL_EXPORT void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                       std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                       sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
-                       std::int64_t lda);
-ONEMKL_EXPORT void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                       std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                       sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a,
-                       std::int64_t lda);
-
-ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-
-ONEMKL_EXPORT void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<float>, 1>& result);
-ONEMKL_EXPORT void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& result);
-
-ONEMKL_EXPORT void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                        std::int64_t ldc);
-ONEMKL_EXPORT void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                        std::int64_t ldb, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<float>, 1>& a);
-ONEMKL_EXPORT void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& a);
-
-ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                        float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                        double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                        sycl::buffer<double, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, std::complex<float> beta,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                        std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-                        std::int64_t incx);
-ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-                        std::int64_t incx);
-ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, std::int64_t m, std::int64_t n, float alpha,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-                        std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, std::int64_t m, std::int64_t n, double alpha,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-                        std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
-                        std::int64_t ldc);
-ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                        std::int64_t ldc);
-ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                        std::int64_t ldb, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<float>, 1>& result);
-ONEMKL_EXPORT void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& result);
-
-ONEMKL_EXPORT void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                       sycl::buffer<float, 1>& a, std::int64_t lda);
-ONEMKL_EXPORT void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                       std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                       sycl::buffer<double, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-
-ONEMKL_EXPORT void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                        sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
-                        sycl::buffer<float, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a,
-                        sycl::buffer<double, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a,
-                        std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a,
-                        std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-ONEMKL_EXPORT void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<float, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<double, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
-                        std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-ONEMKL_EXPORT void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
-                        std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
-                        double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& result);
-ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& result);
-ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<float, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& result);
-ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<double, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-                        std::int64_t incx);
-ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-                        std::int64_t incx);
-ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a);
-ONEMKL_EXPORT void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                        std::int64_t n, double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a);
-
-ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                              std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                              std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                              std::int64_t n, std::complex<float> alpha,
-                              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                              uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                              std::int64_t n, std::complex<double> alpha,
-                              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                        std::int64_t incy, sycl::buffer<float, 1>& param);
-ONEMKL_EXPORT void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-                        std::int64_t incy, sycl::buffer<double, 1>& param);
-
-ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                       sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                       std::int64_t incy, sycl::buffer<float, 1>& result);
-ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                       sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-                       std::int64_t incy, sycl::buffer<double, 1>& result);
-ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                       sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                       std::int64_t incy, sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb,
-                          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                          std::int64_t incy, sycl::buffer<float, 1>& result);
-
-ONEMKL_EXPORT void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
-                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                         transpose trans, std::int64_t n, std::int64_t k,
-                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                         std::int64_t ldb, double beta, sycl::buffer<std::complex<double>, 1>& c,
-                         std::int64_t ldc);
-
-ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& a,
-                        sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c,
-                        sycl::buffer<float, 1>& s);
-ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& a,
-                        sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c,
-                        sycl::buffer<double, 1>& s);
-ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue,
-                        sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
-                        sycl::buffer<std::complex<float>, 1>& s);
-ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue,
-                        sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
-                        sycl::buffer<std::complex<double>, 1>& s);
-
-ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, float alpha,
-                                  sycl::buffer<float, 1>& a, std::int64_t lda,
-                                  std::int64_t stride_a, sycl::buffer<float, 1>& b,
-                                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, double alpha,
-                                  sycl::buffer<double, 1>& a, std::int64_t lda,
-                                  std::int64_t stride_a, sycl::buffer<double, 1>& b,
-                                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                                  std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
-                                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                                  std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
-                                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, float alpha,
-                                  sycl::buffer<float, 1>& ab, std::int64_t lda, std::int64_t ldb,
-                                  std::int64_t stride, std::int64_t batch_size);
-ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, double alpha,
-                                  sycl::buffer<double, 1>& ab, std::int64_t lda, std::int64_t ldb,
-                                  std::int64_t stride, std::int64_t batch_size);
-ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                  sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
-                                  std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
-ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                  std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                  sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
-                                  std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                 transpose transb, std::int64_t m, std::int64_t n, float alpha,
-                                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                                 float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
-                                 std::int64_t stride_b, sycl::buffer<float, 1>& c, std::int64_t ldc,
-                                 std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                 transpose transb, std::int64_t m, std::int64_t n, double alpha,
-                                 sycl::buffer<double, 1>& a, std::int64_t lda,
-                                 std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
-                                 std::int64_t ldb, std::int64_t stride_b,
-                                 sycl::buffer<double, 1>& c, std::int64_t ldc,
-                                 std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                 transpose transb, std::int64_t m, std::int64_t n,
-                                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                                 std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
-                                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                                 std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
-                                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                 transpose transb, std::int64_t m, std::int64_t n,
-                                 std::complex<double> alpha,
-                                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                                 std::int64_t stride_a, std::complex<double> beta,
-                                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                                 std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
-                                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, double alpha,
-                            sycl::buffer<double, 1>& a, std::int64_t lda,
-                            sycl::buffer<double, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-
-ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                             std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                             std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1>& b,
-                             std::int64_t ldb, std::int64_t strideb);
-ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                             std::int64_t m, std::int64_t n, double alpha,
-                             sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
-                             sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t strideb);
-ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                             std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                             sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                             std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
-                             std::int64_t ldb, std::int64_t strideb);
-ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                             std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                             sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                             std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
-                             std::int64_t ldb, std::int64_t strideb);
-
-ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& ab,
-                            std::int64_t lda, std::int64_t ldb);
-ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, double alpha,
-                            sycl::buffer<double, 1>& ab, std::int64_t lda, std::int64_t ldb);
-ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                            sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
-                            std::int64_t ldb);
-ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                            std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                            sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
-                            std::int64_t ldb);
-
-ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                           transpose transb, std::int64_t m, std::int64_t n, float alpha,
-                           sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
-                           sycl::buffer<float, 1>& b, std::int64_t ldb, sycl::buffer<float, 1>& c,
-                           std::int64_t ldc);
-ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                           transpose transb, std::int64_t m, std::int64_t n, double alpha,
-                           sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
-                           sycl::buffer<double, 1>& b, std::int64_t ldb, sycl::buffer<double, 1>& c,
-                           std::int64_t ldc);
-ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                           transpose transb, std::int64_t m, std::int64_t n,
-                           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                           std::int64_t lda, std::complex<float> beta,
-                           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                           transpose transb, std::int64_t m, std::int64_t n,
-                           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                           std::int64_t lda, std::complex<double> beta,
-                           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-// USM APIs
-
-ONEMKL_EXPORT sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                               const std::complex<float>* a, std::int64_t lda, float beta,
-                               std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                               const std::complex<double>* a, std::int64_t lda, double beta,
-                               std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               float alpha, float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               double alpha, double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               std::complex<double> alpha, std::complex<double>* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               float alpha, std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               double alpha, std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const float* a,
-                               std::int64_t lda, float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const double* a,
-                               std::int64_t lda, double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const float* a,
-                               float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const double* a,
-                               double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<float>* a, std::complex<float>* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<double>* a, std::complex<double>* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, float alpha, const float* x, std::int64_t incx,
-                              float* a, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, double alpha, const double* x, std::int64_t incx,
-                              double* a, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const float** a, std::int64_t* lda, const float** b,
-                                     std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, double* alpha,
-                                     const double** a, std::int64_t* lda, const double** b,
-                                     std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, std::complex<float>* alpha,
-                                     const std::complex<float>** a, std::int64_t* lda,
-                                     const std::complex<float>** b, std::int64_t* ldb,
-                                     std::complex<float>* beta, std::complex<float>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
-                                     const std::complex<double>** a, std::int64_t* lda,
-                                     const std::complex<double>** b, std::int64_t* ldb,
-                                     std::complex<double>* beta, std::complex<double>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, sycl::half* alpha,
-                                     const sycl::half** a, std::int64_t* lda, const sycl::half** b,
-                                     std::int64_t* ldb, sycl::half* beta, sycl::half** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const sycl::half** a, std::int64_t* lda, const sycl::half** b,
-                                     std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const std::int8_t** a, std::int64_t* lda,
-                                     const std::int8_t** b, std::int64_t* ldb, float* beta,
-                                     float** c, std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* transa, transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const std::int8_t** a, std::int64_t* lda,
-                                     const std::int8_t** b, std::int64_t* ldb, float* beta,
-                                     std::int32_t** c, std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose transa, transpose transb, std::int64_t m,
-                                     std::int64_t n, std::int64_t k, float alpha, const float* a,
-                                     std::int64_t lda, std::int64_t stride_a, const float* b,
-                                     std::int64_t ldb, std::int64_t stride_b, float beta, float* c,
-                                     std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose transa, transpose transb, std::int64_t m,
-                                     std::int64_t n, std::int64_t k, double alpha, const double* a,
-                                     std::int64_t lda, std::int64_t stride_a, const double* b,
-                                     std::int64_t ldb, std::int64_t stride_b, double beta,
-                                     double* c, std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
-    const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-    const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<float> beta,
-    std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
-    const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
-    std::complex<double> beta, std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
-    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose transa, transpose transb, std::int64_t m,
-                                     std::int64_t n, std::int64_t k, sycl::half alpha,
-                                     const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
-                                     const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
-                                     sycl::half beta, sycl::half* c, std::int64_t ldc,
-                                     std::int64_t stride_c, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose transa, transpose transb, std::int64_t m,
-                                     std::int64_t n, std::int64_t k, float alpha,
-                                     const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
-                                     const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
-                                     float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose transa, transpose transb, std::int64_t m,
-                                     std::int64_t n, std::int64_t k, float alpha,
-                                     const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
-                                     const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
-                                     float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose transa, transpose transb, std::int64_t m,
-                                     std::int64_t n, std::int64_t k, float alpha,
-                                     const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
-                                     const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
-                                     float beta, std::int32_t* c, std::int64_t ldc,
-                                     std::int64_t stride_c, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                               const float* a, std::int64_t lda, float beta, float* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                               const double* a, std::int64_t lda, double beta, double* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, std::int64_t n, std::int64_t k,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, std::complex<float> beta, std::complex<float>* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, std::int64_t n, std::int64_t k,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, std::complex<double> beta, std::complex<double>* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo* upper_lower, transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, float* alpha, const float** a,
-                                     std::int64_t* lda, float* beta, float** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo* upper_lower, transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, double* alpha, const double** a,
-                                     std::int64_t* lda, double* beta, double** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo* upper_lower, transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, std::complex<float>* alpha,
-                                     const std::complex<float>** a, std::int64_t* lda,
-                                     std::complex<float>* beta, std::complex<float>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo* upper_lower, transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, std::complex<double>* alpha,
-                                     const std::complex<double>** a, std::int64_t* lda,
-                                     std::complex<double>* beta, std::complex<double>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo upper_lower, transpose trans, std::int64_t n,
-                                     std::int64_t k, float alpha, const float* a, std::int64_t lda,
-                                     std::int64_t stride_a, float beta, float* c, std::int64_t ldc,
-                                     std::int64_t stride_c, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo upper_lower, transpose trans, std::int64_t n,
-                                     std::int64_t k, double alpha, const double* a,
-                                     std::int64_t lda, std::int64_t stride_a, double beta,
-                                     double* c, std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo upper_lower, transpose trans, std::int64_t n,
-                                     std::int64_t k, std::complex<float> alpha,
-                                     const std::complex<float>* a, std::int64_t lda,
-                                     std::int64_t stride_a, std::complex<float> beta,
-                                     std::complex<float>* c, std::int64_t ldc,
-                                     std::int64_t stride_c, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     uplo upper_lower, transpose trans, std::int64_t n,
-                                     std::int64_t k, std::complex<double> alpha,
-                                     const std::complex<double>* a, std::int64_t lda,
-                                     std::int64_t stride_a, std::complex<double> beta,
-                                     std::complex<double>* c, std::int64_t ldc,
-                                     std::int64_t stride_c, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* x, std::int64_t incx,
-                               const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* x, std::int64_t incx,
-                               const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
-                              std::int64_t incy, float c, float s,
-                              const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
-                              std::int64_t incy, double c, double s,
-                              const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              float* x, std::int64_t incx, float* y, std::int64_t incy, float c,
-                              float s, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              double* x, std::int64_t incx, double* y, std::int64_t incy, double c,
-                              double s, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               float alpha, const float* x, std::int64_t incx, float* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               double alpha, const double* x, std::int64_t incx, double* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* x,
-                               std::int64_t incx, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* x,
-                               std::int64_t incx, std::complex<double>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, float* alpha, const float** x,
-                                     std::int64_t* incx, float** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, double* alpha, const double** x,
-                                     std::int64_t* incx, double** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, std::complex<float>* alpha,
-                                     const std::complex<float>** x, std::int64_t* incx,
-                                     std::complex<float>** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, std::complex<double>* alpha,
-                                     const std::complex<double>** x, std::int64_t* incx,
-                                     std::complex<double>** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     float alpha, const float* x, std::int64_t incx,
-                                     std::int64_t stridex, float* y, std::int64_t incy,
-                                     std::int64_t stridey, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     double alpha, const double* x, std::int64_t incx,
-                                     std::int64_t stridex, double* y, std::int64_t incy,
-                                     std::int64_t stridey, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     std::complex<float> alpha, const std::complex<float>* x,
-                                     std::int64_t incx, std::int64_t stridex,
-                                     std::complex<float>* y, std::int64_t incy,
-                                     std::int64_t stridey, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     std::complex<double> alpha, const std::complex<double>* x,
-                                     std::int64_t incx, std::int64_t stridex,
-                                     std::complex<double>* y, std::int64_t incy,
-                                     std::int64_t stridey, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                float alpha, const float* x, std::int64_t incx, const float beta,
-                                float* y, std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                double alpha, const double* x, std::int64_t incx, const double beta,
-                                double* y, std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                std::complex<float> alpha, const std::complex<float>* x,
-                                std::int64_t incx, const std::complex<float> beta,
-                                std::complex<float>* y, std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                std::complex<double> alpha, const std::complex<double>* x,
-                                std::int64_t incx, const std::complex<double> beta,
-                                std::complex<double>* y, std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* x, std::int64_t incx,
-                               const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* x, std::int64_t incx,
-                               const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                                const float* a, std::int64_t lda, const float* b, std::int64_t ldb,
-                                float beta, float* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                                const double* a, std::int64_t lda, const double* b,
-                                std::int64_t ldb, double beta, double* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<float> alpha, const std::complex<float>* a,
-                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<double> alpha, const std::complex<double>* a,
-                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double> beta, std::complex<double>* c,
-                                std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, float alpha, const float* a,
-                               std::int64_t lda, const float* x, std::int64_t incx, float beta,
-                               float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, double alpha, const double* a,
-                               std::int64_t lda, const double* x, std::int64_t incx, double beta,
-                               double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose trans, std::int64_t m, std::int64_t n, float alpha,
-                                     const float* a, std::int64_t lda, std::int64_t stridea,
-                                     const float* x, std::int64_t incx, std::int64_t stridex,
-                                     float beta, float* y, std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose trans, std::int64_t m, std::int64_t n, double alpha,
-                                     const double* a, std::int64_t lda, std::int64_t stridea,
-                                     const double* x, std::int64_t incx, std::int64_t stridex,
-                                     double beta, double* y, std::int64_t incy,
-                                     std::int64_t stridey, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-    std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
-    const std::complex<float>* x, std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
-    std::complex<float>* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-    std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
-    std::int64_t stridea, const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
-    std::complex<double> beta, std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
-    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* trans, std::int64_t* m, std::int64_t* n,
-                                     float* alpha, const float** a, std::int64_t* lda,
-                                     const float** x, std::int64_t* incx, float* beta, float** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* trans, std::int64_t* m, std::int64_t* n,
-                                     double* alpha, const double** a, std::int64_t* lda,
-                                     const double** x, std::int64_t* incx, double* beta, double** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* trans, std::int64_t* m, std::int64_t* n,
-                                     std::complex<float>* alpha, const std::complex<float>** a,
-                                     std::int64_t* lda, const std::complex<float>** x,
-                                     std::int64_t* incx, std::complex<float>* beta,
-                                     std::complex<float>** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     transpose* trans, std::int64_t* m, std::int64_t* n,
-                                     std::complex<double>* alpha, const std::complex<double>** a,
-                                     std::int64_t* lda, const std::complex<double>** x,
-                                     std::int64_t* incx, std::complex<double>* beta,
-                                     std::complex<double>** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, std::int64_t m, std::int64_t n,
-                                     const float* a, std::int64_t lda, std::int64_t stridea,
-                                     const float* x, std::int64_t incx, std::int64_t stridex,
-                                     float* c, std::int64_t ldc, std::int64_t stridec,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, std::int64_t m, std::int64_t n,
-                                     const double* a, std::int64_t lda, std::int64_t stridea,
-                                     const double* x, std::int64_t incx, std::int64_t stridex,
-                                     double* c, std::int64_t ldc, std::int64_t stridec,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, std::int64_t m, std::int64_t n,
-                                     const std::complex<float>* a, std::int64_t lda,
-                                     std::int64_t stridea, const std::complex<float>* x,
-                                     std::int64_t incx, std::int64_t stridex,
-                                     std::complex<float>* c, std::int64_t ldc, std::int64_t stridec,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, std::int64_t m, std::int64_t n,
-                                     const std::complex<double>* a, std::int64_t lda,
-                                     std::int64_t stridea, const std::complex<double>* x,
-                                     std::int64_t incx, std::int64_t stridex,
-                                     std::complex<double>* c, std::int64_t ldc,
-                                     std::int64_t stridec, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, std::int64_t* m, std::int64_t* n,
-                                     const float** a, std::int64_t* lda, const float** x,
-                                     std::int64_t* incx, float** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, std::int64_t* m, std::int64_t* n,
-                                     const double** a, std::int64_t* lda, const double** x,
-                                     std::int64_t* incx, double** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, std::int64_t* m, std::int64_t* n,
-                                     const std::complex<float>** a, std::int64_t* lda,
-                                     const std::complex<float>** x, std::int64_t* incx,
-                                     std::complex<float>** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, std::int64_t* m, std::int64_t* n,
-                                     const std::complex<double>** a, std::int64_t* lda,
-                                     const std::complex<double>** x, std::int64_t* incx,
-                                     std::complex<double>** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, float alpha, const std::complex<float>* x,
-                              std::int64_t incx, std::complex<float>* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, double alpha, const std::complex<double>* x,
-                              std::int64_t incx, std::complex<double>* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, float alpha, const std::complex<float>* x,
-                              std::int64_t incx, std::complex<float>* a,
-                              const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, double alpha, const std::complex<double>* x,
-                              std::int64_t incx, std::complex<double>* a,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const float* x, std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const double* x, std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const std::complex<float>* x, std::int64_t incx,
-                                std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const std::complex<double>* x, std::int64_t incx,
-                                std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* a, const std::complex<float>* x,
-                               std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* a, const std::complex<double>* x,
-                               std::int64_t incx, std::complex<double> beta,
-                               std::complex<double>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, float alpha, const float* a, const float* x,
-                               std::int64_t incx, float beta, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, double alpha, const double* a, const double* x,
-                               std::int64_t incx, double beta, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, float* d1,
-                                float* d2, float* x1, float y1, float* param,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, double* d1,
-                                double* d2, double* x1, double y1, double* param,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               float* x, std::int64_t incx, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               double* x, std::int64_t incx, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* x, std::int64_t incx,
-                               const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* x, std::int64_t incx,
-                               const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<float>* x, std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<double>* x, std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const float* x, std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const double* x, std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                                float alpha, const float* a, std::int64_t lda, const float* b,
-                                std::int64_t ldb, float beta, float* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                                double alpha, const double* a, std::int64_t lda, const double* b,
-                                std::int64_t ldb, double beta, double* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                                std::complex<float> alpha, const std::complex<float>* a,
-                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose transa, transpose transb, std::int64_t n, std::int64_t k,
-                                std::complex<double> alpha, const std::complex<double>* a,
-                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double> beta, std::complex<double>* c,
-                                std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                               float alpha, const float* a, std::int64_t lda, const float* b,
-                               std::int64_t ldb, float beta, float* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                               double alpha, const double* a, std::int64_t lda, const double* b,
-                               std::int64_t ldb, double beta, double* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                               std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                               std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::half alpha, const sycl::half* a, std::int64_t lda,
-                               const sycl::half* b, std::int64_t ldb, sycl::half beta,
-                               sycl::half* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                               float alpha, const sycl::half* a, std::int64_t lda,
-                               const sycl::half* b, std::int64_t ldb, float beta, float* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                               transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
-                               float alpha, const bfloat16* a, std::int64_t lda, const bfloat16* b,
-                               std::int64_t ldb, float beta, float* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue,
-                                    transpose transa, transpose transb, offset offsetc, int64_t m,
-                                    int64_t n, int64_t k, float alpha, const std::int8_t* a,
-                                    int64_t lda, std::int8_t ao, const std::uint8_t* b, int64_t ldb,
-                                    std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc,
-                                    const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue,
-                                    transpose transa, transpose transb, offset offsetc, int64_t m,
-                                    int64_t n, int64_t k, float alpha, const std::int8_t* a,
-                                    int64_t lda, std::int8_t ao, const std::int8_t* b, int64_t ldb,
-                                    std::int8_t bo, float beta, std::int32_t* c, int64_t ldc,
-                                    const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue,
-                                    transpose transa, transpose transb, offset offsetc, int64_t m,
-                                    int64_t n, int64_t k, float alpha, const std::uint8_t* a,
-                                    int64_t lda, std::uint8_t ao, const std::int8_t* b, int64_t ldb,
-                                    std::int8_t bo, float beta, std::int32_t* c, int64_t ldc,
-                                    const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue,
-                                    transpose transa, transpose transb, offset offsetc, int64_t m,
-                                    int64_t n, int64_t k, float alpha, const std::uint8_t* a,
-                                    int64_t lda, std::uint8_t ao, const std::uint8_t* b,
-                                    int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c,
-                                    int64_t ldc, const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, float alpha, const float* x, std::int64_t incx,
-                               const float* y, std::int64_t incy, float* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, double alpha, const double* x, std::int64_t incx,
-                               const double* y, std::int64_t incy, double* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                              std::int64_t n, float alpha, const float* x, std::int64_t incx,
-                              const float* y, std::int64_t incy, float* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                              std::int64_t n, double alpha, const double* x, std::int64_t incx,
-                              const double* y, std::int64_t incy, double* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, float alpha, const float* a, std::int64_t lda,
-                               float* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, double alpha, const double* a, std::int64_t lda,
-                               double* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, uplo upper_lower, transpose trans,
-                                     diag unit_diag, int64_t m, int64_t n, float alpha,
-                                     const float* a, int64_t lda, int64_t stride_a, float* b,
-                                     int64_t ldb, int64_t stride_b, int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, uplo upper_lower, transpose trans,
-                                     diag unit_diag, int64_t m, int64_t n, double alpha,
-                                     const double* a, int64_t lda, int64_t stride_a, double* b,
-                                     int64_t ldb, int64_t stride_b, int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, uplo upper_lower, transpose trans,
-                                     diag unit_diag, int64_t m, int64_t n,
-                                     std::complex<float> alpha, const std::complex<float>* a,
-                                     int64_t lda, int64_t stride_a, std::complex<float>* b,
-                                     int64_t ldb, int64_t stride_b, int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side left_right, uplo upper_lower, transpose trans,
-                                     diag unit_diag, int64_t m, int64_t n,
-                                     std::complex<double> alpha, const std::complex<double>* a,
-                                     int64_t lda, int64_t stride_a, std::complex<double>* b,
-                                     int64_t ldb, int64_t stride_b, int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, uplo* upper_lower, transpose* trans,
-                                     diag* unit_diag, int64_t* m, int64_t* n, float* alpha,
-                                     const float** a, int64_t* lda, float** b, int64_t* ldb,
-                                     int64_t group_count, int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, uplo* upper_lower, transpose* trans,
-                                     diag* unit_diag, int64_t* m, int64_t* n, double* alpha,
-                                     const double** a, int64_t* lda, double** b, int64_t* ldb,
-                                     int64_t group_count, int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, uplo* upper_lower, transpose* trans,
-                                     diag* unit_diag, int64_t* m, int64_t* n,
-                                     std::complex<float>* alpha, const std::complex<float>** a,
-                                     int64_t* lda, std::complex<float>** b, int64_t* ldb,
-                                     int64_t group_count, int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     side* left_right, uplo* upper_lower, transpose* trans,
-                                     diag* unit_diag, int64_t* m, int64_t* n,
-                                     std::complex<double>* alpha, const std::complex<double>** a,
-                                     int64_t* lda, std::complex<double>** b, int64_t* ldb,
-                                     int64_t group_count, int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<float>* x, std::int64_t incx,
-                               const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<double>* x, std::int64_t incx,
-                               const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                               std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                               std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* x, std::int64_t incx,
-                               const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* x, std::int64_t incx,
-                               const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                               float alpha, const float* a, std::int64_t lda, const float* x,
-                               std::int64_t incx, float beta, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                               double alpha, const double* a, std::int64_t lda, const double* x,
-                               std::int64_t incx, double beta, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                               std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const float* a, std::int64_t lda, float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const double* a, std::int64_t lda, double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, std::int64_t m, std::int64_t n, float alpha,
-                               const float* a, std::int64_t lda, const float* b, std::int64_t ldb,
-                               float beta, float* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, std::int64_t m, std::int64_t n, double alpha,
-                               const double* a, std::int64_t lda, const double* b, std::int64_t ldb,
-                               double beta, double* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                               std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                               std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<float>* x, std::int64_t incx,
-                               const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<double>* x, std::int64_t incx,
-                               const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, float alpha, const float* x, std::int64_t incx,
-                              float* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                              std::int64_t n, double alpha, const double* x, std::int64_t incx,
-                              double* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, float alpha, const float* a, std::int64_t lda,
-                               float* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, double alpha, const double* a, std::int64_t lda,
-                               double* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
-                               uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, float alpha, const float* a, std::int64_t lda,
-                               const float* x, std::int64_t incx, float beta, float* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, double alpha, const double* a, std::int64_t lda,
-                               const double* x, std::int64_t incx, double beta, double* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const float* a,
-                               float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const double* a,
-                               double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<float>* a, std::complex<float>* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<double>* a, std::complex<double>* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const float* a,
-                               std::int64_t lda, float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, const double* a,
-                               std::int64_t lda, double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n,
-                               const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const float* x, std::int64_t incx, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const double* x, std::int64_t incx, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, const float** x, std::int64_t* incx,
-                                     float** y, std::int64_t* incy, int64_t group_count,
-                                     int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, const double** x, std::int64_t* incx,
-                                     double** y, std::int64_t* incy, int64_t group_count,
-                                     int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, const std::complex<float>** x,
-                                     std::int64_t* incx, std::complex<float>** y,
-                                     std::int64_t* incy, int64_t group_count, int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                     std::int64_t* n, const std::complex<double>** x,
-                                     std::int64_t* incx, std::complex<double>** y,
-                                     std::int64_t* incy, int64_t group_count, int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     const float* x, std::int64_t incx, std::int64_t stridex,
-                                     float* y, std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     const double* x, std::int64_t incx, std::int64_t stridex,
-                                     double* y, std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     const std::complex<float>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<float>* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                     const std::complex<double>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<double>* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const float* x, std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const double* x, std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const std::complex<float>* x, std::int64_t incx,
-                                std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                const std::complex<double>* x, std::int64_t incx,
-                                std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::int64_t k, float alpha, const float* a,
-                               std::int64_t lda, const float* x, std::int64_t incx, float beta,
-                               float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, std::int64_t k, double alpha, const double* a,
-                               std::int64_t lda, const double* x, std::int64_t incx, double beta,
-                               double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<float>* x, std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const std::complex<double>* x, std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const float* x, std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               const double* x, std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const float* a, std::int64_t lda, float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const double* a, std::int64_t lda, double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               transpose trans, diag unit_diag, std::int64_t n, std::int64_t k,
-                               const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, float alpha, const float* x, std::int64_t incx,
-                               const float* y, std::int64_t incy, float* a,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                               std::int64_t n, double alpha, const double* x, std::int64_t incx,
-                               const double* y, std::int64_t incy, double* a,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               float* x, std::int64_t incx, float* y, std::int64_t incy,
-                               float* param, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               double* x, std::int64_t incx, double* y, std::int64_t incy,
-                               double* param, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              const float* x, std::int64_t incx, const float* y, std::int64_t incy,
-                              float* result, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              const double* x, std::int64_t incx, const double* y,
-                              std::int64_t incy, double* result,
-                              const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                              const float* x, std::int64_t incx, const float* y, std::int64_t incy,
-                              double* result, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                 float sb, const float* x, std::int64_t incx, const float* y,
-                                 std::int64_t incy, float* result,
-                                 const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<float> alpha, const std::complex<float>* a,
-                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                                float beta, std::complex<float>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
-                                transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<double> alpha, const std::complex<double>* a,
-                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                                double beta, std::complex<double>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, float* a, float* b,
-                               float* c, float* s,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, double* a, double* b,
-                               double* c, double* s,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue,
-                               std::complex<float>* a, std::complex<float>* b, float* c,
-                               std::complex<float>* s,
-                               const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue,
-                               std::complex<double>* a, std::complex<double>* b, double* c,
-                               std::complex<double>* s,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         float alpha, const float* a, std::int64_t lda,
-                                         std::int64_t stride_a, float* b, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         double alpha, const double* a, std::int64_t lda,
-                                         std::int64_t stride_a, double* b, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         std::complex<float> alpha, const std::complex<float>* a,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::complex<float>* b, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         std::complex<double> alpha, const std::complex<double>* a,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::complex<double>* b, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         float alpha, float* ab, std::int64_t lda, std::int64_t ldb,
-                                         std::int64_t stride, std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         double alpha, double* ab, std::int64_t lda,
-                                         std::int64_t ldb, std::int64_t stride,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         std::complex<float> alpha, std::complex<float>* ab,
-                                         std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose trans, std::int64_t m, std::int64_t n,
-                                         std::complex<double> alpha, std::complex<double>* ab,
-                                         std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                        transpose transa, transpose transb, std::int64_t m,
-                                        std::int64_t n, float alpha, const float* a,
-                                        std::int64_t lda, std::int64_t stride_a, float beta,
-                                        const float* b, std::int64_t ldb, std::int64_t stride_b,
-                                        float* c, std::int64_t ldc, std::int64_t stride_c,
-                                        std::int64_t batch_size,
-                                        const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                        transpose transa, transpose transb, std::int64_t m,
-                                        std::int64_t n, double alpha, const double* a,
-                                        std::int64_t lda, std::int64_t stride_a, double beta,
-                                        const double* b, std::int64_t ldb, std::int64_t stride_b,
-                                        double* c, std::int64_t ldc, std::int64_t stride_c,
-                                        std::int64_t batch_size,
-                                        const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatadd_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-    std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
-    std::int64_t lda, std::int64_t stride_a, std::complex<float> beta, const std::complex<float>* b,
-    std::int64_t ldb, std::int64_t stride_b, std::complex<float>* c, std::int64_t ldc,
-    std::int64_t stride_c, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatadd_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-    std::int64_t m, std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
-    std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
-    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<double>* c,
-    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, float alpha, const float* a,
-                                   std::int64_t lda, float* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, double alpha, const double* a,
-                                   std::int64_t lda, double* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                   const std::complex<float>* a, std::int64_t lda,
-                                   std::complex<float>* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                   const std::complex<double>* a, std::int64_t lda,
-                                   std::complex<double>* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                    std::int64_t m, std::int64_t n, float alpha, const float* a,
-                                    std::int64_t lda, std::int64_t stridea, float* b,
-                                    std::int64_t ldb, std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                    std::int64_t m, std::int64_t n, double alpha, const double* a,
-                                    std::int64_t lda, std::int64_t stridea, double* b,
-                                    std::int64_t ldb, std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                    const std::complex<float>* a, std::int64_t lda,
-                                    std::int64_t stridea, std::complex<float>* b, std::int64_t ldb,
-                                    std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                    const std::complex<double>* a, std::int64_t lda,
-                                    std::int64_t stridea, std::complex<double>* b, std::int64_t ldb,
-                                    std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, float alpha, float* ab,
-                                   std::int64_t lda, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, double alpha, double* ab,
-                                   std::int64_t lda, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                   std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
-                                   std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                   std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                  transpose transb, std::int64_t m, std::int64_t n, float alpha,
-                                  const float* a, std::int64_t lda, float beta, const float* b,
-                                  std::int64_t ldb, float* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                  transpose transb, std::int64_t m, std::int64_t n, double alpha,
-                                  const double* a, std::int64_t lda, double beta, const double* b,
-                                  std::int64_t ldb, double* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                  transpose transb, std::int64_t m, std::int64_t n,
-                                  std::complex<float> alpha, const std::complex<float>* a,
-                                  std::int64_t lda, std::complex<float> beta,
-                                  const std::complex<float>* b, std::int64_t ldb,
-                                  std::complex<float>* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
-                                  transpose transb, std::int64_t m, std::int64_t n,
-                                  std::complex<double> alpha, const std::complex<double>* a,
-                                  std::int64_t lda, std::complex<double> beta,
-                                  const std::complex<double>* b, std::int64_t ldb,
-                                  std::complex<double>* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         float* alpha, const float** a, std::int64_t* lda,
-                                         float** b, std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         double* alpha, const double** a, std::int64_t* lda,
-                                         double** b, std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         std::complex<float>* alpha, const std::complex<float>** a,
-                                         std::int64_t* lda, std::complex<float>** b,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         std::complex<double>* alpha,
-                                         const std::complex<double>** a, std::int64_t* lda,
-                                         std::complex<double>** b, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         float* alpha, float** ab, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         double* alpha, double** ab, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         std::complex<float>* alpha, std::complex<float>** ab,
-                                         std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         transpose* trans, std::int64_t* m, std::int64_t* n,
-                                         std::complex<double>* alpha, std::complex<double>** ab,
-                                         std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
diff --git a/include/oneapi/mkl/blas/detail/onemkl_blas_backends.hxx b/include/oneapi/mkl/blas/detail/onemkl_blas_backends.hxx
deleted file mode 100644
index ef0db5b09..000000000
--- a/include/oneapi/mkl/blas/detail/onemkl_blas_backends.hxx
+++ /dev/null
@@ -1,2853 +0,0 @@
-/*******************************************************************************
-* Copyright 2020-2021 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions
-* and limitations under the License.
-*
-*
-* SPDX-License-Identifier: Apache-2.0
-*******************************************************************************/
-
-// Buffer APIs
-
-ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                        oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                        std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
-                        sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                        oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                        std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
-                        sycl::buffer<double, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                        oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                        std::int64_t k, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                        std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                        oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                        std::int64_t k, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
-                        std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                        oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                        std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a,
-                        std::int64_t lda, sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
-                        sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                        oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                        std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a,
-                        std::int64_t lda, sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
-                        float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                        oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                        std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a, std::int64_t lda,
-                        sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
-                        sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, float alpha,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-                        std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, double alpha,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-                        std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
-                        std::int64_t ldc);
-
-ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                        std::int64_t ldc);
-
-ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                        std::int64_t ldb, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void hemm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                        std::int64_t ldc);
-
-ONEMKL_EXPORT void hemm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                        std::int64_t ldb, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
-                        sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
-                        sycl::buffer<double, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, std::complex<float> beta,
-                        sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                              oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                              float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
-                              std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                              oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                              double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
-                              std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                              oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
-                              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                              oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
-                              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, float beta,
-                        sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, double beta,
-                        sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha,
-                         sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-                         std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha,
-                         sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-                         std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
-                         std::int64_t ldc);
-
-ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
-                         std::int64_t ldb, std::complex<float> beta,
-                         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                         std::int64_t ldb, std::complex<double> beta,
-                         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
-                         std::int64_t ldb, float beta, sycl::buffer<std::complex<float>, 1>& c,
-                         std::int64_t ldc);
-
-ONEMKL_EXPORT void her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                         std::int64_t ldb, double beta, sycl::buffer<std::complex<double>, 1>& c,
-                         std::int64_t ldc);
-
-ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                        oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b,
-                        std::int64_t ldb);
-
-ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                        sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                              std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                              std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, float beta,
-                              sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                              std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                              std::int64_t lda, std::int64_t stridea, sycl::buffer<double, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, double beta,
-                              sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                              std::int64_t n, std::complex<float> alpha,
-                              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
-                              sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                              std::int64_t n, std::complex<double> alpha,
-                              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
-                              sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                              std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
-                              std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<float, 1>& c, std::int64_t ldc,
-                              std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
-                              std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
-                              std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<double, 1>& c, std::int64_t ldc,
-                              std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
-                              std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
-                              std::int64_t lda, std::int64_t stridea,
-                              sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c,
-                              std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
-                              std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
-                              std::int64_t lda, std::int64_t stridea,
-                              sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c,
-                              std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-                        std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                        std::int64_t n, std::int64_t kl, std::int64_t ku,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha,
-                       sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-                       std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha,
-                       sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-                       std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::int64_t k, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::int64_t k, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, std::complex<float> beta,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, std::complex<double> beta,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                       sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                       sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                       sycl::buffer<std::complex<float>, 1>& a);
-
-ONEMKL_EXPORT void hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                       sycl::buffer<std::complex<double>, 1>& a);
-
-ONEMKL_EXPORT void hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a);
-
-ONEMKL_EXPORT void hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<double>, 1>& a);
-
-ONEMKL_EXPORT void sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                        sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-                        sycl::buffer<float, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                        sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                       sycl::buffer<float, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                       sycl::buffer<double, 1>& a, std::int64_t lda);
-
-ONEMKL_EXPORT void syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a,
-                        std::int64_t lda);
-
-ONEMKL_EXPORT void spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        float alpha, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
-                        std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                       sycl::buffer<float, 1>& a);
-
-ONEMKL_EXPORT void spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                       double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                       sycl::buffer<double, 1>& a);
-
-ONEMKL_EXPORT void spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
-                        sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a);
-
-ONEMKL_EXPORT void spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                        double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& a);
-
-ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<float, 1>& a, std::int64_t lda,
-                        sycl::buffer<float, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<double, 1>& a, std::int64_t lda,
-                        sycl::buffer<double, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        std::int64_t k, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-                        std::int64_t incx);
-
-ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-                        std::int64_t incx);
-
-ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-                        std::int64_t incx);
-
-ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-                        std::int64_t incx);
-
-ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<float>, 1>& result);
-
-ONEMKL_EXPORT void dotc(sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& result);
-
-ONEMKL_EXPORT void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy, sycl::buffer<std::complex<float>, 1>& result);
-
-ONEMKL_EXPORT void dotu(sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-                        sycl::buffer<std::complex<double>, 1>& result);
-
-ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                         std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                         std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                         std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                         std::int64_t incx, sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                         sycl::buffer<std::int64_t, 1>& result);
-
-ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& result);
-
-ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& result);
-
-ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                        std::int64_t incx, sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, double alpha,
-                        sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha,
-                              sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha,
-                              sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
-                              sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                              sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                              sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-                         std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
-                         std::int64_t incy);
-
-ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, double alpha,
-                         sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-                         sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-                         std::int64_t incy);
-
-ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-                         std::int64_t incy);
-
-ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                        std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, sycl::buffer<float, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                              std::int64_t incx, std::int64_t stridex, sycl::buffer<double, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n,
-                              sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n,
-                              sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                              std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
-                              std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-ONEMKL_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                       std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
-                       sycl::buffer<float, 1>& result);
-
-ONEMKL_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                       std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
-                       sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer<float, 1>& x,
-                          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
-                          sycl::buffer<float, 1>& result);
-
-ONEMKL_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                       std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
-                       sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& result);
-
-ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& result);
-
-ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                        std::int64_t incx, sycl::buffer<double, 1>& result);
-
-ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                       std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                       std::int64_t incy, float c, float s);
-
-ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-                       std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y,
-                       std::int64_t incy, double c, double s);
-
-ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                       std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c,
-                       float s);
-
-ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                       std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c,
-                       double s);
-
-ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& b,
-                        sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s);
-
-ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& b,
-                        sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s);
-
-ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
-                        sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
-                        sycl::buffer<std::complex<float>, 1>& s);
-
-ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
-                        sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
-                        sycl::buffer<std::complex<double>, 1>& s);
-
-ONEMKL_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
-                        sycl::buffer<float, 1>& param);
-
-ONEMKL_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                        std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
-                        sycl::buffer<double, 1>& param);
-
-ONEMKL_EXPORT void rotmg(sycl::queue& queue, sycl::buffer<float, 1>& d1, sycl::buffer<float, 1>& d2,
-                         sycl::buffer<float, 1>& x1, float y1, sycl::buffer<float, 1>& param);
-
-ONEMKL_EXPORT void rotmg(sycl::queue& queue, sycl::buffer<double, 1>& d1,
-                         sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
-                         sycl::buffer<double, 1>& param);
-
-ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-                        std::int64_t incx);
-
-ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha,
-                        sycl::buffer<double, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha,
-                        sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-                        std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-                        std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                        std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
-                        std::int64_t incy);
-
-ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n,
-                        sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-                        sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, float beta,
-                              sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, double beta,
-                              sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
-                              std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, std::complex<float> alpha,
-                              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, std::complex<float> beta,
-                              sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, std::complex<double> alpha,
-                              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
-                              std::int64_t ldb, std::int64_t stride_b, std::complex<double> beta,
-                              sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, sycl::half beta,
-                              sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
-                              std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
-                              std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
-                              std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                              oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                              std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, float beta, sycl::buffer<std::int32_t, 1>& c,
-                              std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                              oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                              oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                              float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                              oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                              oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                              double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                              std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                              oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                              oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                              oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                              oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                              std::int64_t lda, std::int64_t stride_a,
-                              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                              std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                         std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
-                         std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
-                         sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                         std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
-                         std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb,
-                         double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                         std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                         std::int64_t ldc);
-
-ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                         oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                         std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
-                         std::int64_t ldc);
-
-ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                             oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                             std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                             sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao,
-                             sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
-                             sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
-                             sycl::buffer<int32_t, 1>& co);
-
-ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                             oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                             std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                             sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao,
-                             sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
-                             sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
-                             sycl::buffer<int32_t, 1>& co);
-
-ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                             oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                             std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                             sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
-                             sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
-                             sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
-                             sycl::buffer<int32_t, 1>& co);
-
-ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                             oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                             std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                             sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
-                             sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
-                             sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
-                             sycl::buffer<int32_t, 1>& co);
-
-ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                                  std::int64_t lda, std::int64_t stride_a,
-                                  sycl::buffer<float, 1>& b, std::int64_t ldb,
-                                  std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                                  std::int64_t lda, std::int64_t stride_a,
-                                  sycl::buffer<double, 1>& b, std::int64_t ldb,
-                                  std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, std::complex<float> alpha,
-                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                                  std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
-                                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, std::complex<double> alpha,
-                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                                  std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
-                                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-
-ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, float alpha, sycl::buffer<float, 1>& ab,
-                                  std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                                  std::int64_t batch_size);
-
-ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, double alpha, sycl::buffer<double, 1>& ab,
-                                  std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                                  std::int64_t batch_size);
-
-ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, std::complex<float> alpha,
-                                  sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
-                                  std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
-
-ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                  std::int64_t n, std::complex<double> alpha,
-                                  sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
-                                  std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                 float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                                 std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
-                                 std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
-                                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                 double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                                 std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
-                                 std::int64_t ldb, std::int64_t stride_b,
-                                 sycl::buffer<double, 1>& c, std::int64_t ldc,
-                                 std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                                 std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
-                                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                                 std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
-                                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                 std::complex<double> alpha,
-                                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                                 std::int64_t stride_a, std::complex<double> beta,
-                                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                                 std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
-                                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb);
-
-ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb);
-
-ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, std::complex<float> alpha,
-                            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-
-ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, std::complex<double> alpha,
-                            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-
-ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                             std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                             std::int64_t lda, std::int64_t stridea, sycl::buffer<float, 1>& b,
-                             std::int64_t ldb, std::int64_t strideb);
-
-ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                             std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                             std::int64_t lda, std::int64_t stridea, sycl::buffer<double, 1>& b,
-                             std::int64_t ldb, std::int64_t strideb);
-
-ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                             std::int64_t n, std::complex<float> alpha,
-                             sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-                             std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
-                             std::int64_t ldb, std::int64_t strideb);
-
-ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                             std::int64_t n, std::complex<double> alpha,
-                             sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                             std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
-                             std::int64_t ldb, std::int64_t strideb);
-
-ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, float alpha, sycl::buffer<float, 1>& ab,
-                            std::int64_t lda, std::int64_t ldb);
-
-ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, double alpha, sycl::buffer<double, 1>& ab,
-                            std::int64_t lda, std::int64_t ldb);
-
-ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, std::complex<float> alpha,
-                            sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
-                            std::int64_t ldb);
-
-ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                            std::int64_t n, std::complex<double> alpha,
-                            sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
-                            std::int64_t ldb);
-
-ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                           oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                           float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
-                           sycl::buffer<float, 1>& b, std::int64_t ldb, sycl::buffer<float, 1>& c,
-                           std::int64_t ldc);
-
-ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                           oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
-                           sycl::buffer<double, 1>& b, std::int64_t ldb, sycl::buffer<double, 1>& c,
-                           std::int64_t ldc);
-
-ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                           oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                           std::int64_t lda, std::complex<float> beta,
-                           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                           oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                           std::int64_t lda, std::complex<double> beta,
-                           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-// USM APIs
-
-ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                               oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                               std::int64_t k, float alpha, const float* a, std::int64_t lda,
-                               const float* b, std::int64_t ldb, float beta, float* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                               oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                               std::int64_t k, double alpha, const double* a, std::int64_t lda,
-                               const double* b, std::int64_t ldb, double beta, double* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                               oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                               std::int64_t k, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               const std::complex<float>* b, std::int64_t ldb,
-                               std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                               oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                               std::int64_t k, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               const std::complex<double>* b, std::int64_t ldb,
-                               std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                               oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                               std::int64_t k, sycl::half alpha, const sycl::half* a,
-                               std::int64_t lda, const sycl::half* b, std::int64_t ldb,
-                               sycl::half beta, sycl::half* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                               oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                               std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda,
-                               const sycl::half* b, std::int64_t ldb, float beta, float* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa,
-                               oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                               std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda,
-                               const bfloat16* b, std::int64_t ldb, float beta, float* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                                    std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                                    const std::int8_t* a, std::int64_t lda, std::int8_t ao,
-                                    const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
-                                    float beta, std::int32_t* c, std::int64_t ldc,
-                                    const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                                    std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                                    const std::int8_t* a, std::int64_t lda, std::int8_t ao,
-                                    const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
-                                    float beta, std::int32_t* c, std::int64_t ldc,
-                                    const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                                    std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                                    const std::uint8_t* a, std::int64_t lda, std::uint8_t ao,
-                                    const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
-                                    float beta, std::int32_t* c, std::int64_t ldc,
-                                    const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc,
-                                    std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                                    const std::uint8_t* a, std::int64_t lda, std::uint8_t ao,
-                                    const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
-                                    float beta, std::int32_t* c, std::int64_t ldc,
-                                    const std::int32_t* co,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                               float alpha, const float* a, std::int64_t lda, const float* b,
-                               std::int64_t ldb, float beta, float* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                               double alpha, const double* a, std::int64_t lda, const double* b,
-                               std::int64_t ldb, double beta, double* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                               std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                               std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                               std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                               std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                               float alpha, const float* a, std::int64_t lda, float beta, float* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                               double alpha, const double* a, std::int64_t lda, double beta,
-                               double* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, std::complex<float> beta, std::complex<float>* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, std::complex<double> beta, std::complex<double>* c,
-                               std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                                     oneapi::mkl::transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, float* alpha, const float** a,
-                                     std::int64_t* lda, float* beta, float** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                                     oneapi::mkl::transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, double* alpha, const double** a,
-                                     std::int64_t* lda, double* beta, double** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                                     oneapi::mkl::transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, std::complex<float>* alpha,
-                                     const std::complex<float>** a, std::int64_t* lda,
-                                     std::complex<float>* beta, std::complex<float>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                                     oneapi::mkl::transpose* trans, std::int64_t* n,
-                                     std::int64_t* k, std::complex<double>* alpha,
-                                     const std::complex<double>** a, std::int64_t* lda,
-                                     std::complex<double>* beta, std::complex<double>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                     float alpha, const float* a, std::int64_t lda,
-                                     std::int64_t stride_a, float beta, float* c, std::int64_t ldc,
-                                     std::int64_t stride_c, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                     double alpha, const double* a, std::int64_t lda,
-                                     std::int64_t stride_a, double beta, double* c,
-                                     std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                     std::complex<float> alpha, const std::complex<float>* a,
-                                     std::int64_t lda, std::int64_t stride_a,
-                                     std::complex<float> beta, std::complex<float>* c,
-                                     std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                     std::complex<double> alpha, const std::complex<double>* a,
-                                     std::int64_t lda, std::int64_t stride_a,
-                                     std::complex<double> beta, std::complex<double>* c,
-                                     std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                               float alpha, const std::complex<float>* a, std::int64_t lda,
-                               float beta, std::complex<float>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                               double alpha, const std::complex<double>* a, std::int64_t lda,
-                               double beta, std::complex<double>* c, std::int64_t ldc,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                float alpha, const float* a, std::int64_t lda, const float* b,
-                                std::int64_t ldb, float beta, float* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                double alpha, const double* a, std::int64_t lda, const double* b,
-                                std::int64_t ldb, double beta, double* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<float> alpha, const std::complex<float>* a,
-                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<double> alpha, const std::complex<double>* a,
-                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double> beta, std::complex<double>* c,
-                                std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<float> alpha, const std::complex<float>* a,
-                                std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
-                                float beta, std::complex<float>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
-                                std::complex<double> alpha, const std::complex<double>* a,
-                                std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
-                                double beta, std::complex<double>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               float alpha, const float* a, std::int64_t lda, float* b,
-                               std::int64_t ldb, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               double alpha, const double* a, std::int64_t lda, double* b,
-                               std::int64_t ldb, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               float alpha, const float* a, std::int64_t lda, float* b,
-                               std::int64_t ldb, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               double alpha, const double* a, std::int64_t lda, double* b,
-                               std::int64_t ldb, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right,
-                               oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                               oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                     oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                                     float alpha, const float* a, std::int64_t lda,
-                                     std::int64_t stride_a, float* b, std::int64_t ldb,
-                                     std::int64_t stride_b, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                     oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                                     double alpha, const double* a, std::int64_t lda,
-                                     std::int64_t stride_a, double* b, std::int64_t ldb,
-                                     std::int64_t stride_b, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                     oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                                     std::complex<float> alpha, const std::complex<float>* a,
-                                     std::int64_t lda, std::int64_t stride_a,
-                                     std::complex<float>* b, std::int64_t ldb,
-                                     std::int64_t stride_b, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                     oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-                                     std::complex<double> alpha, const std::complex<double>* a,
-                                     std::int64_t lda, std::int64_t stride_a,
-                                     std::complex<double>* b, std::int64_t ldb,
-                                     std::int64_t stride_b, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                                     oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n,
-                                     float* alpha, const float** a, std::int64_t* lda, float** b,
-                                     std::int64_t* ldb, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                                     oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n,
-                                     double* alpha, const double** a, std::int64_t* lda, double** b,
-                                     std::int64_t* ldb, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                                     oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n,
-                                     std::complex<float>* alpha, const std::complex<float>** a,
-                                     std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                                     oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n,
-                                     std::complex<double>* alpha, const std::complex<double>** a,
-                                     std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, float alpha, const float* a, std::int64_t lda,
-                               const float* x, std::int64_t incx, float beta, float* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, double alpha, const double* a, std::int64_t lda,
-                               const double* x, std::int64_t incx, double beta, double* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                     std::int64_t m, std::int64_t n, float alpha, const float* a,
-                                     std::int64_t lda, std::int64_t stridea, const float* x,
-                                     std::int64_t incx, std::int64_t stridex, float beta, float* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                     std::int64_t m, std::int64_t n, double alpha, const double* a,
-                                     std::int64_t lda, std::int64_t stridea, const double* x,
-                                     std::int64_t incx, std::int64_t stridex, double beta,
-                                     double* y, std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-    std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
-    const std::complex<float>* x, std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
-    std::complex<float>* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-    std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
-    std::int64_t stridea, const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
-    std::complex<double> beta, std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
-    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                     std::int64_t* m, std::int64_t* n, float* alpha,
-                                     const float** a, std::int64_t* lda, const float** x,
-                                     std::int64_t* incx, float* beta, float** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                     std::int64_t* m, std::int64_t* n, double* alpha,
-                                     const double** a, std::int64_t* lda, const double** x,
-                                     std::int64_t* incx, double* beta, double** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                     std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
-                                     const std::complex<float>** a, std::int64_t* lda,
-                                     const std::complex<float>** x, std::int64_t* incx,
-                                     std::complex<float>* beta, std::complex<float>** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                     std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
-                                     const std::complex<double>** a, std::int64_t* lda,
-                                     const std::complex<double>** x, std::int64_t* incx,
-                                     std::complex<double>* beta, std::complex<double>** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     std::int64_t m, std::int64_t n, const float* a,
-                                     std::int64_t lda, std::int64_t stridea, const float* x,
-                                     std::int64_t incx, std::int64_t stridex, float* c,
-                                     std::int64_t ldc, std::int64_t stridec,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     std::int64_t m, std::int64_t n, const double* a,
-                                     std::int64_t lda, std::int64_t stridea, const double* x,
-                                     std::int64_t incx, std::int64_t stridex, double* c,
-                                     std::int64_t ldc, std::int64_t stridec,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     std::int64_t m, std::int64_t n, const std::complex<float>* a,
-                                     std::int64_t lda, std::int64_t stridea,
-                                     const std::complex<float>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<float>* c, std::int64_t ldc,
-                                     std::int64_t stridec, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                                     std::int64_t m, std::int64_t n, const std::complex<double>* a,
-                                     std::int64_t lda, std::int64_t stridea,
-                                     const std::complex<double>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<double>* c,
-                                     std::int64_t ldc, std::int64_t stridec,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     std::int64_t* m, std::int64_t* n, const float** a,
-                                     std::int64_t* lda, const float** x, std::int64_t* incx,
-                                     float** c, std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     std::int64_t* m, std::int64_t* n, const double** a,
-                                     std::int64_t* lda, const double** x, std::int64_t* incx,
-                                     double** c, std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     std::int64_t* m, std::int64_t* n,
-                                     const std::complex<float>** a, std::int64_t* lda,
-                                     const std::complex<float>** x, std::int64_t* incx,
-                                     std::complex<float>** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                                     std::int64_t* m, std::int64_t* n,
-                                     const std::complex<double>** a, std::int64_t* lda,
-                                     const std::complex<double>** x, std::int64_t* incx,
-                                     std::complex<double>** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha,
-                               const float* a, std::int64_t lda, const float* x, std::int64_t incx,
-                               float beta, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
-                               const double* a, std::int64_t lda, const double* x,
-                               std::int64_t incx, double beta, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, std::int64_t kl, std::int64_t ku,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                               std::int64_t n, std::int64_t kl, std::int64_t ku,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha,
-                              const float* x, std::int64_t incx, const float* y, std::int64_t incy,
-                              float* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha,
-                              const double* x, std::int64_t incx, const double* y,
-                              std::int64_t incy, double* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* x,
-                               std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* x,
-                               std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* x,
-                               std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* x,
-                               std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::int64_t k, std::complex<float> alpha,
-                               const std::complex<float>* a, std::int64_t lda,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::int64_t k, std::complex<double> alpha,
-                               const std::complex<double>* a, std::int64_t lda,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              float alpha, const std::complex<float>* x, std::int64_t incx,
-                              std::complex<float>* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              double alpha, const std::complex<double>* x, std::int64_t incx,
-                              std::complex<double>* a, std::int64_t lda,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* x,
-                               std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* x,
-                               std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* a,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* a,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double> beta, std::complex<double>* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              float alpha, const std::complex<float>* x, std::int64_t incx,
-                              std::complex<float>* a,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              double alpha, const std::complex<double>* x, std::int64_t incx,
-                              std::complex<double>* a,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<float> alpha, const std::complex<float>* x,
-                               std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* a,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::complex<double> alpha, const std::complex<double>* x,
-                               std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* a,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::int64_t k, float alpha, const float* a, std::int64_t lda,
-                               const float* x, std::int64_t incx, float beta, float* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               std::int64_t k, double alpha, const double* a, std::int64_t lda,
-                               const double* x, std::int64_t incx, double beta, double* y,
-                               std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               float alpha, const float* a, std::int64_t lda, const float* x,
-                               std::int64_t incx, float beta, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               double alpha, const double* a, std::int64_t lda, const double* x,
-                               std::int64_t incx, double beta, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              float alpha, const float* x, std::int64_t incx, float* a,
-                              std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              double alpha, const double* x, std::int64_t incx, double* a,
-                              std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               float alpha, const float* x, std::int64_t incx, const float* y,
-                               std::int64_t incy, float* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               double alpha, const double* x, std::int64_t incx, const double* y,
-                               std::int64_t incy, double* a, std::int64_t lda,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               float alpha, const float* a, const float* x, std::int64_t incx,
-                               float beta, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               double alpha, const double* a, const double* x, std::int64_t incx,
-                               double beta, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              float alpha, const float* x, std::int64_t incx, float* a,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                              double alpha, const double* x, std::int64_t incx, double* a,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               float alpha, const float* x, std::int64_t incx, const float* y,
-                               std::int64_t incy, float* a,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
-                               double alpha, const double* x, std::int64_t incx, const double* y,
-                               std::int64_t incy, double* a,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
-                               float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
-                               double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const std::complex<float>* a,
-                               std::int64_t lda, std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const std::complex<double>* a,
-                               std::int64_t lda, std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
-                               float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
-                               double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const std::complex<float>* a,
-                               std::int64_t lda, std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, std::int64_t k, const std::complex<double>* a,
-                               std::int64_t lda, std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const float* a, float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const double* a, double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<float>* a, std::complex<float>* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<double>* a,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const float* a, float* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const double* a, double* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<float>* a, std::complex<float>* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<double>* a,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const float* a, std::int64_t lda, float* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const double* a, std::int64_t lda, double* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const float* a, std::int64_t lda, float* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const double* a, std::int64_t lda, double* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<float>* a, std::int64_t lda,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                               oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
-                               std::int64_t n, const std::complex<double>* a, std::int64_t lda,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                               std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                               std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                               std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                               std::complex<float>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                               std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                               std::complex<double>* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                                std::int64_t incx, std::int64_t* result,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                               std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                               std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x,
-                               std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x,
-                               std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x,
-                               std::int64_t incx, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x,
-                               std::int64_t incx, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                               const std::complex<float>* x, std::int64_t incx,
-                               std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                               const std::complex<double>* x, std::int64_t incx,
-                               std::complex<double>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha,
-                                     const float** x, std::int64_t* incx, float** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha,
-                                     const double** x, std::int64_t* incx, double** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n,
-                                     std::complex<float>* alpha, const std::complex<float>** x,
-                                     std::int64_t* incx, std::complex<float>** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n,
-                                     std::complex<double>* alpha, const std::complex<double>** x,
-                                     std::int64_t* incx, std::complex<double>** y,
-                                     std::int64_t* incy, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha,
-                                     const float* x, std::int64_t incx, std::int64_t stridex,
-                                     float* y, std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha,
-                                     const double* x, std::int64_t incx, std::int64_t stridex,
-                                     double* y, std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                                     const std::complex<float>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<float>* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                                     const std::complex<double>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<double>* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x,
-                                std::int64_t incx, const float beta, float* y, std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x,
-                                std::int64_t incx, const double beta, double* y, std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                                const std::complex<float>* x, std::int64_t incx,
-                                const std::complex<float> beta, std::complex<float>* y,
-                                std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                                const std::complex<double>* x, std::int64_t incx,
-                                const std::complex<double> beta, std::complex<double>* y,
-                                std::int64_t incy,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x,
-                               std::int64_t incx, float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x,
-                               std::int64_t incx, double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                               std::int64_t incx, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                               std::int64_t incx, std::complex<double>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x,
-                                     std::int64_t* incx, float** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x,
-                                     std::int64_t* incx, double** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n,
-                                     const std::complex<float>** x, std::int64_t* incx,
-                                     std::complex<float>** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n,
-                                     const std::complex<double>** x, std::int64_t* incx,
-                                     std::complex<double>** y, std::int64_t* incy,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x,
-                                     std::int64_t incx, std::int64_t stridex, float* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x,
-                                     std::int64_t incx, std::int64_t stridex, double* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n,
-                                     const std::complex<float>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<float>* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n,
-                                     const std::complex<double>* x, std::int64_t incx,
-                                     std::int64_t stridex, std::complex<double>* y,
-                                     std::int64_t incy, std::int64_t stridey,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                              const float* y, std::int64_t incy, float* result,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x,
-                              std::int64_t incx, const double* y, std::int64_t incy, double* result,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x,
-                                 std::int64_t incx, const float* y, std::int64_t incy,
-                                 float* result, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                              const float* y, std::int64_t incy, double* result,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                               std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                               std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x,
-                               std::int64_t incx, float* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x,
-                               std::int64_t incx, double* result,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<float>* x,
-                              std::int64_t incx, std::complex<float>* y, std::int64_t incy, float c,
-                              float s, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<double>* x,
-                              std::int64_t incx, std::complex<double>* y, std::int64_t incy,
-                              double c, double s,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx,
-                              float* y, std::int64_t incy, float c, float s,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx,
-                              double* y, std::int64_t incy, double c, double s,
-                              const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, std::complex<float>* a, std::complex<float>* b,
-                               float* c, std::complex<float>* s,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, std::complex<double>* a, std::complex<double>* b,
-                               double* c, std::complex<double>* s,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx,
-                               float* y, std::int64_t incy, float* param,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx,
-                               double* y, std::int64_t incy, double* param,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1,
-                                float* param, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1,
-                                double* param, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x,
-                               std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha,
-                               std::complex<float>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha,
-                               std::complex<double>* x, std::int64_t incx,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx,
-                               float* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx,
-                               double* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<float>* x,
-                               std::int64_t incx, std::complex<float>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<double>* x,
-                               std::int64_t incx, std::complex<double>* y, std::int64_t incy,
-                               const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const float** a, std::int64_t* lda, const float** b,
-                                     std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, double* alpha,
-                                     const double** a, std::int64_t* lda, const double** b,
-                                     std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, std::complex<float>* alpha,
-                                     const std::complex<float>** a, std::int64_t* lda,
-                                     const std::complex<float>** b, std::int64_t* ldb,
-                                     std::complex<float>* beta, std::complex<float>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
-                                     const std::complex<double>** a, std::int64_t* lda,
-                                     const std::complex<double>** b, std::int64_t* ldb,
-                                     std::complex<double>* beta, std::complex<double>** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, sycl::half* alpha,
-                                     const sycl::half** a, std::int64_t* lda, const sycl::half** b,
-                                     std::int64_t* ldb, sycl::half* beta, sycl::half** c,
-                                     std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const sycl::half** a, std::int64_t* lda, const sycl::half** b,
-                                     std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
-                                     std::int64_t group_count, std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const std::int8_t** a, std::int64_t* lda,
-                                     const std::int8_t** b, std::int64_t* ldb, float* beta,
-                                     float** c, std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                                     oneapi::mkl::transpose* transb, std::int64_t* m,
-                                     std::int64_t* n, std::int64_t* k, float* alpha,
-                                     const std::int8_t** a, std::int64_t* lda,
-                                     const std::int8_t** b, std::int64_t* ldb, float* beta,
-                                     std::int32_t** c, std::int64_t* ldc, std::int64_t group_count,
-                                     std::int64_t* group_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                     oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                     std::int64_t k, float alpha, const float* a, std::int64_t lda,
-                                     std::int64_t stride_a, const float* b, std::int64_t ldb,
-                                     std::int64_t stride_b, float beta, float* c, std::int64_t ldc,
-                                     std::int64_t stride_c, std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                     oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                     std::int64_t k, double alpha, const double* a,
-                                     std::int64_t lda, std::int64_t stride_a, const double* b,
-                                     std::int64_t ldb, std::int64_t stride_b, double beta,
-                                     double* c, std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(
-    sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
-    const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-    const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<float> beta,
-    std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(
-    sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-    std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
-    const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
-    std::complex<double> beta, std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
-    std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                     oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                     std::int64_t k, sycl::half alpha, const sycl::half* a,
-                                     std::int64_t lda, std::int64_t stride_a, const sycl::half* b,
-                                     std::int64_t ldb, std::int64_t stride_b, sycl::half beta,
-                                     sycl::half* c, std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                     oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                     std::int64_t k, float alpha, const sycl::half* a,
-                                     std::int64_t lda, std::int64_t stride_a, const sycl::half* b,
-                                     std::int64_t ldb, std::int64_t stride_b, float beta, float* c,
-                                     std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                     oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                     std::int64_t k, float alpha, const std::int8_t* a,
-                                     std::int64_t lda, std::int64_t stride_a, const std::int8_t* b,
-                                     std::int64_t ldb, std::int64_t stride_b, float beta, float* c,
-                                     std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                     oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                     std::int64_t k, float alpha, const std::int8_t* a,
-                                     std::int64_t lda, std::int64_t stride_a, const std::int8_t* b,
-                                     std::int64_t ldb, std::int64_t stride_b, float beta,
-                                     std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
-                                     std::int64_t batch_size,
-                                     const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                                std::int64_t n, std::int64_t k, float alpha, const float* a,
-                                std::int64_t lda, const float* b, std::int64_t ldb, float beta,
-                                float* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                                std::int64_t n, std::int64_t k, double alpha, const double* a,
-                                std::int64_t lda, const double* b, std::int64_t ldb, double beta,
-                                double* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                                std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                                const std::complex<float>* a, std::int64_t lda,
-                                const std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                                std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                                const std::complex<double>* a, std::int64_t lda,
-                                const std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double> beta, std::complex<double>* c,
-                                std::int64_t ldc,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, float alpha,
-                                         const float* a, std::int64_t lda, std::int64_t stride_a,
-                                         float* b, std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, double alpha,
-                                         const double* a, std::int64_t lda, std::int64_t stride_a,
-                                         double* b, std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                         const std::complex<float>* a, std::int64_t lda,
-                                         std::int64_t stride_a, std::complex<float>* b,
-                                         std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                         const std::complex<double>* a, std::int64_t lda,
-                                         std::int64_t stride_a, std::complex<double>* b,
-                                         std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, float alpha, float* ab,
-                                         std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, double alpha, double* ab,
-                                         std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                         std::complex<float>* ab, std::int64_t lda,
-                                         std::int64_t ldb, std::int64_t stride,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                         std::complex<double>* ab, std::int64_t lda,
-                                         std::int64_t ldb, std::int64_t stride,
-                                         std::int64_t batch_size,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                        oneapi::mkl::transpose transb, std::int64_t m,
-                                        std::int64_t n, float alpha, const float* a,
-                                        std::int64_t lda, std::int64_t stride_a, float beta,
-                                        const float* b, std::int64_t ldb, std::int64_t stride_b,
-                                        float* c, std::int64_t ldc, std::int64_t stride_c,
-                                        std::int64_t batch_size,
-                                        const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                        oneapi::mkl::transpose transb, std::int64_t m,
-                                        std::int64_t n, double alpha, const double* a,
-                                        std::int64_t lda, std::int64_t stride_a, double beta,
-                                        const double* b, std::int64_t ldb, std::int64_t stride_b,
-                                        double* c, std::int64_t ldc, std::int64_t stride_c,
-                                        std::int64_t batch_size,
-                                        const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd_batch(
-    sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-    std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
-    std::int64_t lda, std::int64_t stride_a, std::complex<float> beta, const std::complex<float>* b,
-    std::int64_t ldb, std::int64_t stride_b, std::complex<float>* c, std::int64_t ldc,
-    std::int64_t stride_c, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd_batch(
-    sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-    std::int64_t m, std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
-    std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
-    const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b, std::complex<double>* c,
-    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, float alpha, const float* a, std::int64_t lda,
-                                   float* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, double alpha, const double* a, std::int64_t lda,
-                                   double* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, std::complex<float> alpha,
-                                   const std::complex<float>* a, std::int64_t lda,
-                                   std::complex<float>* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, std::complex<double> alpha,
-                                   const std::complex<double>* a, std::int64_t lda,
-                                   std::complex<double>* b, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                    std::int64_t m, std::int64_t n, float alpha, const float* a,
-                                    std::int64_t lda, std::int64_t stridea, float* b,
-                                    std::int64_t ldb, std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                    std::int64_t m, std::int64_t n, double alpha, const double* a,
-                                    std::int64_t lda, std::int64_t stridea, double* b,
-                                    std::int64_t ldb, std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                                    const std::complex<float>* a, std::int64_t lda,
-                                    std::int64_t stridea, std::complex<float>* b, std::int64_t ldb,
-                                    std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                                    const std::complex<double>* a, std::int64_t lda,
-                                    std::int64_t stridea, std::complex<double>* b, std::int64_t ldb,
-                                    std::int64_t strideb,
-                                    const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, float alpha, float* ab, std::int64_t lda,
-                                   std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, double alpha, double* ab, std::int64_t lda,
-                                   std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, std::complex<float> alpha,
-                                   std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
-                                   std::int64_t n, std::complex<double> alpha,
-                                   std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
-                                   const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                  oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                  float alpha, const float* a, std::int64_t lda, float beta,
-                                  const float* b, std::int64_t ldb, float* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                  oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                  double alpha, const double* a, std::int64_t lda, double beta,
-                                  const double* b, std::int64_t ldb, double* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                  oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                  std::complex<float> alpha, const std::complex<float>* a,
-                                  std::int64_t lda, std::complex<float> beta,
-                                  const std::complex<float>* b, std::int64_t ldb,
-                                  std::complex<float>* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                  oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
-                                  std::complex<double> alpha, const std::complex<double>* a,
-                                  std::int64_t lda, std::complex<double> beta,
-                                  const std::complex<double>* b, std::int64_t ldb,
-                                  std::complex<double>* c, std::int64_t ldc,
-                                  const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, float* alpha, const float** a,
-                                         std::int64_t* lda, float** b, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, double* alpha, const double** a,
-                                         std::int64_t* lda, double** b, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, std::complex<float>* alpha,
-                                         const std::complex<float>** a, std::int64_t* lda,
-                                         std::complex<float>** b, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, std::complex<double>* alpha,
-                                         const std::complex<double>** a, std::int64_t* lda,
-                                         std::complex<double>** b, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, float* alpha, float** ab,
-                                         std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, double* alpha, double** ab,
-                                         std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, std::complex<float>* alpha,
-                                         std::complex<float>** ab, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m,
-                                         std::int64_t* n, std::complex<double>* alpha,
-                                         std::complex<double>** ab, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* groupsize,
-                                         const std::vector<sycl::event>& dependencies = {});
diff --git a/include/oneapi/mkl/dft.hpp b/include/oneapi/mkl/dft.hpp
index 17ee4e042..d86952974 100644
--- a/include/oneapi/mkl/dft.hpp
+++ b/include/oneapi/mkl/dft.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright 2024 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,23 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_HPP_
-#define _ONEMKL_DFT_HPP_
+#ifndef ONEMATH_MKL_DFT_HPP
+#define ONEMATH_MKL_DFT_HPP
 
-#if __has_include(<sycl/sycl.hpp>)
-#include <sycl/sycl.hpp>
-#else
-#include <CL/sycl.hpp>
-#endif
-#include <complex>
-#include <cstdint>
+// Deprecated header is planned to be removed late 2025.
+#pragma message( \
+    "Header `oneapi/mkl/dft.hpp` is deprecated, please use `oneapi/math/dft.hpp` instead")
 
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
-#include "oneapi/mkl/dft/detail/dft_loader.hpp"
+#include "oneapi/math/dft.hpp"
 
-#include "oneapi/mkl/dft/descriptor.hpp"
-#include "oneapi/mkl/dft/forward.hpp"
-#include "oneapi/mkl/dft/backward.hpp"
+#include "namespace_alias.hpp"
 
-#endif // _ONEMKL_DFT_HPP_
+#endif // ONEMATH_MKL_DFT_HPP
diff --git a/include/oneapi/mkl/lapack.hpp b/include/oneapi/mkl/lapack.hpp
index f9d331e42..0a358f564 100644
--- a/include/oneapi/mkl/lapack.hpp
+++ b/include/oneapi/mkl/lapack.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2022 Intel Corporation
+* Copyright 2024 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,21 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#pragma once
+#ifndef ONEMATH_MKL_LAPACK_HPP
+#define ONEMATH_MKL_LAPACK_HPP
 
-#include "oneapi/mkl/detail/config.hpp"
+// Deprecated header is planned to be removed late 2025.
+#pragma message( \
+    "Header `oneapi/mkl/lapack.hpp` is deprecated, please use `oneapi/math/lapack.hpp` instead")
 
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
-#include "oneapi/mkl/lapack/detail/mklcpu/lapack_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
-#include "oneapi/mkl/lapack/detail/mklgpu/lapack_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_CUSOLVER_BACKEND
-#include "oneapi/mkl/lapack/detail/cusolver/lapack_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_ROCSOLVER_BACKEND
-#include "oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hpp"
-#endif
+#include "oneapi/math/lapack.hpp"
 
-#include "oneapi/mkl/lapack/detail/lapack_rt.hpp"
+#include "namespace_alias.hpp"
+
+#endif // ONEMATH_MKL_LAPACK_HPP
diff --git a/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hxx b/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hxx
deleted file mode 100644
index 0b1d58ba1..000000000
--- a/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hxx
+++ /dev/null
@@ -1,2627 +0,0 @@
-/***************************************************************************
-*  Copyright (C) Codeplay Software Limited
-*  Licensed under the Apache License, Version 2.0 (the "License");
-*  you may not use this file except in compliance with the License.
-*  You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-*  For your convenience, a copy of the License has been included in this
-*  repository.
-*
-*  Unless required by applicable law or agreed to in writing, software
-*  distributed under the License is distributed on an "AS IS" BASIS,
-*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-*  See the License for the specific language governing permissions and
-*  limitations under the License.
-*
-**************************************************************************/
-
-// Buffer APIs
-
-static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tauq,
-                         sycl::buffer<std::complex<float>>& taup,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                         scratchpad, scratchpad_size);
-}
-static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<double>& tauq, sycl::buffer<double>& taup,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                         scratchpad, scratchpad_size);
-}
-static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tauq,
-                         sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                         scratchpad, scratchpad_size);
-}
-static inline void gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tauq,
-                         sycl::buffer<std::complex<double>>& taup,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup,
-                                         scratchpad, scratchpad_size);
-}
-static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void getrs(backend_selector<backend::cusolver> selector, oneapi::mkl::transpose trans,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void getrs(backend_selector<backend::cusolver> selector, oneapi::mkl::transpose trans,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void getrs(backend_selector<backend::cusolver> selector, oneapi::mkl::transpose trans,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b,
-                         std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void getrs(backend_selector<backend::cusolver> selector, oneapi::mkl::transpose trans,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& s,
-                         sycl::buffer<double>& u, std::int64_t ldu, sycl::buffer<double>& vt,
-                         std::int64_t ldvt, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu,
-                                         vt, ldvt, scratchpad, scratchpad_size);
-}
-static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& s,
-                         sycl::buffer<float>& u, std::int64_t ldu, sycl::buffer<float>& vt,
-                         std::int64_t ldvt, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu,
-                                         vt, ldvt, scratchpad, scratchpad_size);
-}
-static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u,
-                         std::int64_t ldu, sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu,
-                                         vt, ldvt, scratchpad, scratchpad_size);
-}
-static inline void gesvd(backend_selector<backend::cusolver> selector, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u,
-                         std::int64_t ldu, sycl::buffer<std::complex<double>>& vt,
-                         std::int64_t ldvt, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu,
-                                         vt, ldvt, scratchpad, scratchpad_size);
-}
-static inline void heevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad,
-                                         scratchpad_size);
-}
-static inline void heevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad,
-                                         scratchpad_size);
-}
-static inline void hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb,
-                                         w, scratchpad, scratchpad_size);
-}
-static inline void hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb,
-                                         w, scratchpad, scratchpad_size);
-}
-static inline void hetrd(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void hetrd(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void hetrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void hetrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void orgbr(backend_selector<backend::cusolver> selector, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void orgbr(backend_selector<backend::cusolver> selector, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void orgtr(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void orgtr(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void ormtr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau,
-                                         c, ldc, scratchpad, scratchpad_size);
-}
-static inline void ormtr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau,
-                                         c, ldc, scratchpad, scratchpad_size);
-}
-static inline void ormrq(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void ormrq(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void ormqr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void ormqr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potri(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potri(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potri(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potri(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                         scratchpad_size);
-}
-static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void potrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                         scratchpad, scratchpad_size);
-}
-static inline void syevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& w,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad,
-                                         scratchpad_size);
-}
-static inline void syevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad,
-                                         scratchpad_size);
-}
-static inline void sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& w,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb,
-                                         w, scratchpad, scratchpad_size);
-}
-static inline void sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
-                         std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb,
-                                         w, scratchpad, scratchpad_size);
-}
-static inline void sytrd(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void sytrd(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void sytrf(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad,
-                                         scratchpad_size);
-}
-static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
-                                         b, ldb, scratchpad, scratchpad_size);
-}
-static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
-                                         b, ldb, scratchpad, scratchpad_size);
-}
-static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
-                                         b, ldb, scratchpad, scratchpad_size);
-}
-static inline void trtrs(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda,
-                                         b, ldb, scratchpad, scratchpad_size);
-}
-static inline void ungbr(backend_selector<backend::cusolver> selector, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void ungbr(backend_selector<backend::cusolver> selector, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                         scratchpad, scratchpad_size);
-}
-static inline void ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void ungtr(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void ungtr(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad,
-                                         scratchpad_size);
-}
-static inline void unmrq(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void unmrq(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void unmqr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void unmqr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c,
-                                         ldc, scratchpad, scratchpad_size);
-}
-static inline void unmtr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau,
-                                         c, ldc, scratchpad, scratchpad_size);
-}
-static inline void unmtr(backend_selector<backend::cusolver> selector, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau,
-                                         c, ldc, scratchpad, scratchpad_size);
-}
-static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<float>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<double>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-static inline void geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void getrs_batch(backend_selector<backend::cusolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                               stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void getrs_batch(backend_selector<backend::cusolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                               stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void getrs_batch(backend_selector<backend::cusolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<std::complex<float>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                               stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void getrs_batch(backend_selector<backend::cusolver> selector,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<std::complex<double>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                               stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv,
-                                               stride_ipiv, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-static inline void orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-static inline void potrf_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void potrf_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void potrf_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void potrf_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a,
-                                               batch_size, scratchpad, scratchpad_size);
-}
-static inline void potrs_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
-                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                               stride_a, b, ldb, stride_b, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void potrs_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
-                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                               stride_a, b, ldb, stride_b, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void potrs_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                               stride_a, b, ldb, stride_b, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void potrs_batch(backend_selector<backend::cusolver> selector, oneapi::mkl::uplo uplo,
-                               std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                               stride_a, b, ldb, stride_b, batch_size, scratchpad,
-                                               scratchpad_size);
-}
-static inline void ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-static inline void ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                               std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size) {
-    oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau,
-                                               stride_tau, batch_size, scratchpad, scratchpad_size);
-}
-
-// USM APIs
-
-static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* d,
-                                float* e, std::complex<float>* tauq, std::complex<float>* taup,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
-                                                taup, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* d, double* e,
-                                double* tauq, double* taup, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
-                                                taup, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* d, float* e,
-                                float* tauq, float* taup, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
-                                                taup, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event gebrd(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* d, double* e, std::complex<double>* tauq,
-                                std::complex<double>* taup, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq,
-                                                taup, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event gerqf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrf(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event getri(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                                b, ldb, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                double* a, std::int64_t lda, std::int64_t* ipiv, double* b,
-                                std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                                b, ldb, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                float* a, std::int64_t lda, std::int64_t* ipiv, float* b,
-                                std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                                b, ldb, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv,
-                                                b, ldb, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* s, double* u,
-                                std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* s, float* u,
-                                std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* s,
-                                std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                                std::int64_t ldvt, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event gesvd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* s, std::complex<double>* u, std::int64_t ldu,
-                                std::complex<double>* vt, std::int64_t ldvt,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s,
-                                                u, ldu, vt, ldvt, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event heevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, float* w, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event heevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                                std::int64_t ldb, float* w, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                                b, ldb, w, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event hegvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                                std::int64_t ldb, double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                                b, ldb, w, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event hetrd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, float* d, float* e, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event hetrd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, double* d, double* e, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event hetrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event hetrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgbr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgbr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
-                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ormtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                                lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event ormtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                                lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event ormrq(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, float* a,
-                                std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event ormrq(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, double* a,
-                                std::int64_t lda, double* tau, double* c, std::int64_t ldc,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event ormqr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, double* a,
-                                std::int64_t lda, double* tau, double* c, std::int64_t ldc,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event ormqr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, float* a,
-                                std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potri(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potri(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potri(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potri(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad,
-                                                scratchpad_size, dependencies);
-}
-static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                double* a, std::int64_t lda, double* b, std::int64_t ldb,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                                std::int64_t ldb, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event potrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                                std::int64_t ldb, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event syevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* w, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event syevd(backend_selector<backend::cusolver> selector, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* w, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                                b, ldb, w, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event sygvd(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda,
-                                                b, ldb, w, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event sytrd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* d, double* e, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event sytrd(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* d, float* e, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event sytrf(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                                std::int64_t ldb, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                                lda, b, ldb, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                double* a, std::int64_t lda, double* b, std::int64_t ldb,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                                lda, b, ldb, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                                lda, b, ldb, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event trtrs(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                                std::int64_t ldb, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a,
-                                                lda, b, ldb, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event ungbr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ungbr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ungqr(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ungtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ungtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau,
-                                                scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event unmrq(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event unmrq(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event unmqr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event unmqr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda,
-                                                tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event unmtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                                lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event unmtr(backend_selector<backend::cusolver> selector,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a,
-                                                lda, tau, c, ldc, scratchpad, scratchpad_size,
-                                                dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      tau, stride_tau, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      tau, stride_tau, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      tau, stride_tau, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      tau, stride_tau, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, float** a, std::int64_t* lda, float** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, double** a, std::int64_t* lda, double** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event geqrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, float** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, double** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrf_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                      float* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t* ipiv, std::int64_t stride_ipiv,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                      double* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t* ipiv, std::int64_t stride_ipiv,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a,
-                                                      ipiv, stride_ipiv, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
-                                      float** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
-                                      double** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getri_batch(backend_selector<backend::cusolver> selector, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
-                                      std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(
-        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-        batch_size, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
-                                      std::int64_t nrhs, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(
-        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-        batch_size, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(
-    backend_selector<backend::cusolver> selector, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<float>* b, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size, std::complex<float>* scratchpad,
-    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(
-        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-        batch_size, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(
-    backend_selector<backend::cusolver> selector, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<double>* b, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size, std::complex<double>* scratchpad,
-    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(
-        selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b,
-        batch_size, scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
-                                      std::int64_t* nrhs, float** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, float** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, group_count, group_sizes,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
-                                      std::int64_t* nrhs, double** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, double** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, group_count, group_sizes,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, group_count, group_sizes,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event getrs_batch(
-    backend_selector<backend::cusolver> selector, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
-    std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-    const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda,
-                                                      ipiv, b, ldb, group_count, group_sizes,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                      stride_a, tau, stride_tau, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                      stride_a, tau, stride_tau, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::int64_t* k, float** a,
-                                      std::int64_t* lda, float** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event orgqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::int64_t* k, double** a,
-                                      std::int64_t* lda, double** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      stride_a, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      stride_a, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      stride_a, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      stride_a, batch_size, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, float** a,
-                                      std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, double** a,
-                                      std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrf_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      float* a, std::int64_t lda, std::int64_t stride_a, float* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      stride_a, b, ldb, stride_b, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      double* a, std::int64_t lda, std::int64_t stride_a, double* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      stride_a, b, ldb, stride_b, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      stride_a, b, ldb, stride_b, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      stride_a, b, ldb, stride_b, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      double** a, std::int64_t* lda, double** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event potrs_batch(backend_selector<backend::cusolver> selector,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda,
-                                                      b, ldb, group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::int64_t k, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::complex<float>* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                      stride_a, tau, stride_tau, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                      std::int64_t n, std::int64_t k, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::complex<double>* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda,
-                                                      stride_a, tau, stride_tau, batch_size,
-                                                      scratchpad, scratchpad_size, dependencies);
-}
-static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::int64_t* k, std::complex<float>** a,
-                                      std::int64_t* lda, std::complex<float>** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-static inline sycl::event ungqr_batch(backend_selector<backend::cusolver> selector, std::int64_t* m,
-                                      std::int64_t* n, std::int64_t* k, std::complex<double>** a,
-                                      std::int64_t* lda, std::complex<double>** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {}) {
-    return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau,
-                                                      group_count, group_sizes, scratchpad,
-                                                      scratchpad_size, dependencies);
-}
-
-// SCRATCHPAD APIs
-template <typename fp_type>
-std::int64_t gebrd_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::gebrd_scratchpad_size<fp_type>(selector.get_queue(), m, n,
-                                                                         lda);
-}
-template <typename fp_type>
-std::int64_t gerqf_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::gerqf_scratchpad_size<fp_type>(selector.get_queue(), m, n,
-                                                                         lda);
-}
-template <typename fp_type>
-std::int64_t geqrf_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::geqrf_scratchpad_size<fp_type>(selector.get_queue(), m, n,
-                                                                         lda);
-}
-template <typename fp_type>
-std::int64_t gesvd_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                                   std::int64_t m, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldu, std::int64_t ldvt) {
-    return oneapi::mkl::lapack::cusolver::gesvd_scratchpad_size<fp_type>(
-        selector.get_queue(), jobu, jobvt, m, n, lda, ldu, ldvt);
-}
-template <typename fp_type>
-std::int64_t getrf_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::getrf_scratchpad_size<fp_type>(selector.get_queue(), m, n,
-                                                                         lda);
-}
-template <typename fp_type>
-std::int64_t getri_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t n,
-                                   std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::getri_scratchpad_size<fp_type>(selector.get_queue(), n,
-                                                                         lda);
-}
-template <typename fp_type>
-std::int64_t getrs_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                   std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::cusolver::getrs_scratchpad_size<fp_type>(selector.get_queue(),
-                                                                         trans, n, nrhs, lda, ldb);
-}
-template <typename fp_type>
-std::int64_t heevd_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::heevd_scratchpad_size<fp_type>(selector.get_queue(), jobz,
-                                                                         uplo, n, lda);
-}
-template <typename fp_type>
-std::int64_t hegvd_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::cusolver::hegvd_scratchpad_size<fp_type>(
-        selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
-}
-template <typename fp_type>
-std::int64_t hetrd_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::hetrd_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t hetrf_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::hetrf_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t orgbr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::orgbr_scratchpad_size<fp_type>(selector.get_queue(), vect,
-                                                                         m, n, k, lda);
-}
-template <typename fp_type>
-std::int64_t orgtr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::orgtr_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t orgqr_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::orgqr_scratchpad_size<fp_type>(selector.get_queue(), m, n,
-                                                                         k, lda);
-}
-template <typename fp_type>
-std::int64_t ormrq_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc) {
-    return oneapi::mkl::lapack::cusolver::ormrq_scratchpad_size<fp_type>(selector.get_queue(), side,
-                                                                         trans, m, n, k, lda, ldc);
-}
-template <typename fp_type>
-std::int64_t ormqr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc) {
-    return oneapi::mkl::lapack::cusolver::ormqr_scratchpad_size<fp_type>(selector.get_queue(), side,
-                                                                         trans, m, n, k, lda, ldc);
-}
-template <typename fp_type>
-std::int64_t ormtr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldc) {
-    return oneapi::mkl::lapack::cusolver::ormtr_scratchpad_size<fp_type>(
-        selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
-}
-template <typename fp_type>
-std::int64_t potrf_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::potrf_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t potrs_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                   std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::cusolver::potrs_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, nrhs, lda, ldb);
-}
-template <typename fp_type>
-std::int64_t potri_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::potri_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t sytrf_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::sytrf_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t syevd_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::syevd_scratchpad_size<fp_type>(selector.get_queue(), jobz,
-                                                                         uplo, n, lda);
-}
-template <typename fp_type>
-std::int64_t sygvd_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t itype,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::cusolver::sygvd_scratchpad_size<fp_type>(
-        selector.get_queue(), itype, jobz, uplo, n, lda, ldb);
-}
-template <typename fp_type>
-std::int64_t sytrd_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::sytrd_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t trtrs_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                   std::int64_t lda, std::int64_t ldb) {
-    return oneapi::mkl::lapack::cusolver::trtrs_scratchpad_size<fp_type>(
-        selector.get_queue(), uplo, trans, diag, n, nrhs, lda, ldb);
-}
-template <typename fp_type>
-std::int64_t ungbr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::ungbr_scratchpad_size<fp_type>(selector.get_queue(), vect,
-                                                                         m, n, k, lda);
-}
-template <typename fp_type>
-std::int64_t ungqr_scratchpad_size(backend_selector<backend::cusolver> selector, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::ungqr_scratchpad_size<fp_type>(selector.get_queue(), m, n,
-                                                                         k, lda);
-}
-template <typename fp_type>
-std::int64_t ungtr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) {
-    return oneapi::mkl::lapack::cusolver::ungtr_scratchpad_size<fp_type>(selector.get_queue(), uplo,
-                                                                         n, lda);
-}
-template <typename fp_type>
-std::int64_t unmrq_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc) {
-    return oneapi::mkl::lapack::cusolver::unmrq_scratchpad_size<fp_type>(selector.get_queue(), side,
-                                                                         trans, m, n, k, lda, ldc);
-}
-template <typename fp_type>
-std::int64_t unmqr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc) {
-    return oneapi::mkl::lapack::cusolver::unmqr_scratchpad_size<fp_type>(selector.get_queue(), side,
-                                                                         trans, m, n, k, lda, ldc);
-}
-template <typename fp_type>
-std::int64_t unmtr_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldc) {
-    return oneapi::mkl::lapack::cusolver::unmtr_scratchpad_size<fp_type>(
-        selector.get_queue(), side, uplo, trans, m, n, lda, ldc);
-}
-template <typename fp_type>
-std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t m, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_ipiv,
-                                         std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, lda, stride_a, stride_ipiv, batch_size);
-}
-template <typename fp_type>
-std::int64_t getri_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_ipiv, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), n, lda, stride_a, stride_ipiv, batch_size);
-}
-template <typename fp_type>
-std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         oneapi::mkl::transpose trans, std::int64_t n,
-                                         std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_ipiv, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b,
-        batch_size);
-}
-template <typename fp_type>
-std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t m, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_tau,
-                                         std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, lda, stride_a, stride_tau, batch_size);
-}
-template <typename fp_type>
-std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), uplo, n, lda, stride_a, batch_size);
-}
-template <typename fp_type>
-std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                         std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
-}
-template <typename fp_type>
-std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t m, std::int64_t n, std::int64_t k,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::orgqr_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
-}
-template <typename fp_type>
-std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t m, std::int64_t n, std::int64_t k,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size) {
-    return oneapi::mkl::lapack::cusolver::ungqr_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size);
-}
-template <typename fp_type>
-std::int64_t getrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, lda, group_count, group_sizes);
-}
-template <typename fp_type>
-std::int64_t getri_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), n, lda, group_count, group_sizes);
-}
-template <typename fp_type>
-std::int64_t getrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         oneapi::mkl::transpose* trans, std::int64_t* n,
-                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes);
-}
-template <typename fp_type>
-std::int64_t geqrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, lda, group_count, group_sizes);
-}
-template <typename fp_type>
-std::int64_t orgqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::orgqr_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, k, lda, group_count, group_sizes);
-}
-template <typename fp_type>
-std::int64_t potrf_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), uplo, n, lda, group_count, group_sizes);
-}
-template <typename fp_type>
-std::int64_t potrs_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes);
-}
-template <typename fp_type>
-std::int64_t ungqr_batch_scratchpad_size(backend_selector<backend::cusolver> selector,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes) {
-    return oneapi::mkl::lapack::cusolver::ungqr_batch_scratchpad_size<fp_type>(
-        selector.get_queue(), m, n, k, lda, group_count, group_sizes);
-}
diff --git a/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hxx b/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hxx
deleted file mode 100644
index f0de843fe..000000000
--- a/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hxx
+++ /dev/null
@@ -1,1830 +0,0 @@
-/***************************************************************************
-*  Copyright (C) Codeplay Software Limited
-*  Licensed under the Apache License, Version 2.0 (the "License");
-*  you may not use this file except in compliance with the License.
-*  You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-*  For your convenience, a copy of the License has been included in this
-*  repository.
-*
-*  Unless required by applicable law or agreed to in writing, software
-*  distributed under the License is distributed on an "AS IS" BASIS,
-*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-*  See the License for the specific language governing permissions and
-*  limitations under the License.
-*
-**************************************************************************/
-
-// Buffer APIs
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tauq,
-                         sycl::buffer<std::complex<float>>& taup,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
-                         sycl::buffer<double>& e, sycl::buffer<double>& tauq,
-                         sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<float>& tauq, sycl::buffer<float>& taup,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tauq,
-                         sycl::buffer<std::complex<double>>& taup,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
-                         sycl::buffer<double>& vt, std::int64_t ldvt,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
-                         sycl::buffer<float>& vt, std::int64_t ldvt,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<float>& s,
-                         sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
-                         sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<double>& s,
-                         sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
-                         sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
-                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
-                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
-                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
-                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
-                         sycl::buffer<double>& e, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
-                         sycl::buffer<float>& e, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
-                         std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<float>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<double>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<float>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<double>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-// USM APIs
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* d, float* e,
-                                std::complex<float>* tauq, std::complex<float>* taup,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* d, double* e, double* tauq, double* taup,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* d, float* e, float* tauq, float* taup,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* d, double* e,
-                                std::complex<double>* tauq, std::complex<double>* taup,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
-                                std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
-                                std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv,
-                                double* b, std::int64_t ldb, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
-                                float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu,
-                                double* vt, std::int64_t ldvt, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt,
-                                std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* s,
-                                std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                                std::int64_t ldvt, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* s,
-                                std::complex<double>* u, std::int64_t ldu, std::complex<double>* vt,
-                                std::int64_t ldvt, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* w,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                                float* w, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                                double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* d, float* e,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* d, double* e,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
-                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                double* a, std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                float* a, std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
-                                std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
-                                std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, float* a, std::int64_t lda, float* b,
-                                std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, double* a, std::int64_t lda, double* b,
-                                std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, double* a, std::int64_t lda, double* w,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, float* a, std::int64_t lda, float* w,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* b, std::int64_t ldb, double* w, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* b, std::int64_t ldb, float* w, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* d, double* e, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* d, float* e, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
-                                double* b, std::int64_t ldb, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
-                                float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, float* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, double* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      float** a, std::int64_t* lda, float** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      double** a, std::int64_t* lda, double** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      float** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      double** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv, float** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv, double** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<double>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, float** a, std::int64_t* lda, float** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, double** a, std::int64_t* lda, double** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      float* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      double* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      float** a, std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      double** a, std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, float** a, std::int64_t* lda, float** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, double** a, std::int64_t* lda, double** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<double>** a,
-                                      std::int64_t* lda, std::complex<double>** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-// SCRATCHPAD APIs
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                                 oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                                 std::int64_t n, std::int64_t lda, std::int64_t ldu,
-                                                 std::int64_t ldvt);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                 std::int64_t n, std::int64_t nrhs,
-                                                 std::int64_t lda, std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                 oneapi::mkl::uplo uplo, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
-                                                 oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect,
-                                                 std::int64_t m, std::int64_t n, std::int64_t k,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t k, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::uplo uplo,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t nrhs,
-                                                 std::int64_t lda, std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                 oneapi::mkl::uplo uplo, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
-                                                 oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 oneapi::mkl::transpose trans,
-                                                 oneapi::mkl::diag diag, std::int64_t n,
-                                                 std::int64_t nrhs, std::int64_t lda,
-                                                 std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect,
-                                                 std::int64_t m, std::int64_t n, std::int64_t k,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t k, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::uplo uplo,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t lda,
-                                                       std::int64_t stride_a,
-                                                       std::int64_t stride_ipiv,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t stride_ipiv,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t lda,
-                                                       std::int64_t stride_a,
-                                                       std::int64_t stride_tau,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                       std::int64_t n, std::int64_t lda,
-                                                       std::int64_t stride_a,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                       std::int64_t n, std::int64_t nrhs,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t ldb, std::int64_t stride_b,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t k,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t stride_tau,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t k,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t stride_tau,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* lda,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n,
-                                                       std::int64_t* lda, std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* lda,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* k,
-                                                       std::int64_t* lda, std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                                       std::int64_t* n, std::int64_t* lda,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                                       std::int64_t* n, std::int64_t* nrhs,
-                                                       std::int64_t* lda, std::int64_t* ldb,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* k,
-                                                       std::int64_t* lda, std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
diff --git a/include/oneapi/mkl/lapack/detail/lapack_loader.hpp b/include/oneapi/mkl/lapack/detail/lapack_loader.hpp
deleted file mode 100644
index fc5b3d70e..000000000
--- a/include/oneapi/mkl/lapack/detail/lapack_loader.hpp
+++ /dev/null
@@ -1,2382 +0,0 @@
-/*******************************************************************************
-* Copyright 2020-2021 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions
-* and limitations under the License.
-*
-*
-* SPDX-License-Identifier: Apache-2.0
-*******************************************************************************/
-
-#pragma once
-
-#include <complex>
-#include <cstdint>
-
-#if __has_include(<sycl/sycl.hpp>)
-#include <sycl/sycl.hpp>
-#else
-#include <CL/sycl.hpp>
-#endif
-
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/detail/export.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
-
-namespace oneapi {
-namespace mkl {
-namespace lapack {
-namespace detail {
-
-ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tauq,
-                         sycl::buffer<std::complex<float>>& taup,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<double>& tauq, sycl::buffer<double>& taup,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tauq,
-                         sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tauq,
-                         sycl::buffer<std::complex<double>>& taup,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& s,
-                         sycl::buffer<double>& u, std::int64_t ldu, sycl::buffer<double>& vt,
-                         std::int64_t ldvt, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& s,
-                         sycl::buffer<float>& u, std::int64_t ldu, sycl::buffer<float>& vt,
-                         std::int64_t ldvt, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u,
-                         std::int64_t ldu, sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                         oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u,
-                         std::int64_t ldu, sycl::buffer<std::complex<double>>& vt,
-                         std::int64_t ldvt, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& w,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& w,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
-                         std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                         oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<float>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<double>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<std::complex<float>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<std::complex<double>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                               std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                               std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-                               std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& b,
-                               std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                               std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* d,
-                                float* e, std::complex<float>* tauq, std::complex<float>* taup,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* d, double* e,
-                                double* tauq, double* taup, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* d, float* e,
-                                float* tauq, float* taup, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* d, double* e, std::complex<double>* tauq,
-                                std::complex<double>* taup, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                double* a, std::int64_t lda, std::int64_t* ipiv, double* b,
-                                std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                float* a, std::int64_t lda, std::int64_t* ipiv, float* b,
-                                std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, double* a, std::int64_t lda, double* s, double* u,
-                                std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, float* a, std::int64_t lda, float* s, float* u,
-                                std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* s,
-                                std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                                std::int64_t ldvt, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* s, std::complex<double>* u, std::int64_t ldu,
-                                std::complex<double>* vt, std::int64_t ldvt,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* w,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* w,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                                std::int64_t ldb, float* w, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                                std::int64_t ldb, double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, float* d, float* e, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, double* d, double* e, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
-                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, float* a,
-                                std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, double* a,
-                                std::int64_t lda, double* tau, double* c, std::int64_t ldc,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, double* a,
-                                std::int64_t lda, double* tau, double* c, std::int64_t ldc,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k, float* a,
-                                std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                double* a, std::int64_t lda, double* b, std::int64_t ldb,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                                std::int64_t ldb, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                                std::int64_t ldb, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* w, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* w, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* d, double* e, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* d, float* e, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                                std::int64_t ldb, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                double* a, std::int64_t lda, double* b, std::int64_t ldb,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
-                                std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                                std::int64_t ldb, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue,
-                                oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::complex<float>* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::complex<double>* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, float** a,
-                                      std::int64_t* lda, float** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, double** a,
-                                      std::int64_t* lda, double** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::complex<float>** a,
-                                      std::int64_t* lda, std::complex<float>** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::complex<double>** a,
-                                      std::int64_t* lda, std::complex<double>** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t n, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t n, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* n, float** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* n, double** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
-                                      std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::transpose trans, std::int64_t n,
-                                      std::int64_t nrhs, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<float>* b, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size, std::complex<float>* scratchpad,
-    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<double>* b, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size, std::complex<double>* scratchpad,
-    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
-                                      std::int64_t* nrhs, float** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, float** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
-                                      std::int64_t* nrhs, double** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, double** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::transpose* trans, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
-    std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-    const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, std::int64_t k, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, float* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, std::int64_t k, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, double* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a,
-                                      std::int64_t* lda, float** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a,
-                                      std::int64_t* lda, double** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, float** a,
-                                      std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, double** a,
-                                      std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      float* a, std::int64_t lda, std::int64_t stride_a, float* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      double* a, std::int64_t lda, std::int64_t stride_a, double* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      double** a, std::int64_t* lda, double** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t m, std::int64_t n, std::int64_t k,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::complex<double>* a, std::int64_t lda, std::int64_t stride_a, std::complex<double>* tau,
-    std::int64_t stride_tau, std::int64_t batch_size, std::complex<double>* scratchpad,
-    std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                                      std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                                   std::int64_t m, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldu, std::int64_t ldvt);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                                   std::int64_t m, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldu, std::int64_t ldvt);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                   std::int64_t n, std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-                                   std::int64_t lda, std::int64_t ldb);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t heevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hegvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldc);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                   std::int64_t lda, std::int64_t ldb);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                   std::int64_t lda, std::int64_t ldb);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::generate vect, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                   oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldc);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t m, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_ipiv,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_ipiv, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         oneapi::mkl::transpose trans, std::int64_t n,
-                                         std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_ipiv, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t m, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_tau,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                         std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t m, std::int64_t n, std::int64_t k,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t m, std::int64_t n, std::int64_t k,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         oneapi::mkl::transpose* trans, std::int64_t* n,
-                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                         std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue,
-                                         std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue,
-                                                        oneapi::mkl::jobsvd jobu,
-                                                        oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda,
-                                                        std::int64_t ldu, std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue,
-                                                         oneapi::mkl::jobsvd jobu,
-                                                         oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda,
-                                                         std::int64_t ldu, std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-    oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu,
-    std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-    oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu,
-    std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, std::int64_t n,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, std::int64_t n,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t n, std::int64_t nrhs,
-                                                        std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t n, std::int64_t nrhs,
-                                                         std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t heevd_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-    std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t heevd_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-    std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-    oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-    oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue,
-                                                        oneapi::mkl::generate vect, std::int64_t m,
-                                                        std::int64_t n, std::int64_t k,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue,
-                                                         oneapi::mkl::generate vect, std::int64_t m,
-                                                         std::int64_t n, std::int64_t k,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t k,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t k,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t m, std::int64_t n,
-                                                        std::int64_t k, std::int64_t lda,
-                                                        std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t m, std::int64_t n,
-                                                        std::int64_t k, std::int64_t lda,
-                                                        std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t m, std::int64_t n,
-                                                        std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t nrhs,
-                                                        std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t nrhs,
-                                                         std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t syevd_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t syevd_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, std::int64_t itype,
-                                                        oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
-                                                        std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, std::int64_t itype,
-                                                         oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
-                                                         std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
-                                                        oneapi::mkl::diag diag, std::int64_t n,
-                                                        std::int64_t nrhs, std::int64_t lda,
-                                                        std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         oneapi::mkl::diag diag, std::int64_t n,
-                                                         std::int64_t nrhs, std::int64_t lda,
-                                                         std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-    oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-    oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
-    std::int64_t n, std::int64_t k, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
-    std::int64_t n, std::int64_t k, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                                      sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                                       sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
-    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
-    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
-    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
-    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-    std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                              sycl::queue& queue, std::int64_t* m,
-                                                              std::int64_t* n, std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                               sycl::queue& queue, std::int64_t* m,
-                                                               std::int64_t* n, std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                              sycl::queue& queue, std::int64_t* n,
-                                                              std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                               sycl::queue& queue, std::int64_t* n,
-                                                               std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey,
-                                                              sycl::queue& queue, std::int64_t* m,
-                                                              std::int64_t* n, std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey,
-                                                               sycl::queue& queue, std::int64_t* m,
-                                                               std::int64_t* n, std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-    std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes);
-} //namespace detail
-} //namespace lapack
-} //namespace mkl
-} //namespace oneapi
diff --git a/include/oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx b/include/oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx
deleted file mode 100644
index bbc6079b7..000000000
--- a/include/oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx
+++ /dev/null
@@ -1,2139 +0,0 @@
-/*******************************************************************************
-* Copyright 2021 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions
-* and limitations under the License.
-*
-*
-* SPDX-License-Identifier: Apache-2.0
-*******************************************************************************/
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tauq,
-                         sycl::buffer<std::complex<float>>& taup,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
-                         sycl::buffer<double>& e, sycl::buffer<double>& tauq,
-                         sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<float>& tauq, sycl::buffer<float>& taup,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tauq,
-                         sycl::buffer<std::complex<double>>& taup,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
-                         sycl::buffer<double>& vt, std::int64_t ldvt,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
-                         sycl::buffer<float>& vt, std::int64_t ldvt,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<float>& s,
-                         sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
-                         sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<double>& s,
-                         sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
-                         sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
-                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
-                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
-                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
-                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
-                         sycl::buffer<double>& e, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
-                         sycl::buffer<float>& e, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
-                         std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<float>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<double>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<float>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<double>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* d, float* e,
-                                std::complex<float>* tauq, std::complex<float>* taup,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* d, double* e, double* tauq, double* taup,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* d, float* e, float* tauq, float* taup,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* d, double* e,
-                                std::complex<double>* tauq, std::complex<double>* taup,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
-                                std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
-                                std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv,
-                                double* b, std::int64_t ldb, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
-                                float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu,
-                                double* vt, std::int64_t ldvt, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt,
-                                std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* s,
-                                std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                                std::int64_t ldvt, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* s,
-                                std::complex<double>* u, std::int64_t ldu, std::complex<double>* vt,
-                                std::int64_t ldvt, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* w,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                                float* w, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                                double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* d, float* e,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* d, double* e,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
-                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                double* a, std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                float* a, std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
-                                std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
-                                std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, float* a, std::int64_t lda, float* b,
-                                std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, double* a, std::int64_t lda, double* b,
-                                std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, double* a, std::int64_t lda, double* w,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, float* a, std::int64_t lda, float* w,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* b, std::int64_t ldb, double* w, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* b, std::int64_t ldb, float* w, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* d, double* e, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* d, float* e, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
-                                double* b, std::int64_t ldb, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
-                                float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, float* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, double* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      float** a, std::int64_t* lda, float** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      double** a, std::int64_t* lda, double** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      float** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      double** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv, float** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv, double** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<double>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, float** a, std::int64_t* lda, float** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, double** a, std::int64_t* lda, double** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      float* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      double* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      float** a, std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      double** a, std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, float** a, std::int64_t* lda, float** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, double** a, std::int64_t* lda, double** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<double>** a,
-                                      std::int64_t* lda, std::complex<double>** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                   std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                   std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_ipiv, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_ipiv,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_ipiv,
-                                         std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t k, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t k, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                         std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* k, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                         std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                         std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* k, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<float>(sycl::queue& queue,
-                                                        oneapi::mkl::jobsvd jobu,
-                                                        oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda,
-                                                        std::int64_t ldu, std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<double>(sycl::queue& queue,
-                                                         oneapi::mkl::jobsvd jobu,
-                                                         oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda,
-                                                         std::int64_t ldu, std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-    std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-    std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      std::int64_t m,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       std::int64_t m,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<float>(sycl::queue& queue, std::int64_t n,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<double>(sycl::queue& queue, std::int64_t n,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<float>(sycl::queue& queue,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t n, std::int64_t nrhs,
-                                                        std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<double>(sycl::queue& queue,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t n, std::int64_t nrhs,
-                                                         std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t heevd_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      oneapi::mkl::job jobz,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t heevd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       oneapi::mkl::job jobz,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-    std::int64_t n, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-    std::int64_t n, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size<float>(sycl::queue& queue,
-                                                        oneapi::mkl::generate vect, std::int64_t m,
-                                                        std::int64_t n, std::int64_t k,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size<double>(sycl::queue& queue,
-                                                         oneapi::mkl::generate vect, std::int64_t m,
-                                                         std::int64_t n, std::int64_t k,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
-                                                        std::int64_t n, std::int64_t k,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
-                                                         std::int64_t n, std::int64_t k,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t m, std::int64_t n,
-                                                        std::int64_t k, std::int64_t lda,
-                                                        std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t m, std::int64_t n,
-                                                        std::int64_t k, std::int64_t lda,
-                                                        std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
-                                                        std::int64_t m, std::int64_t n,
-                                                        std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t nrhs,
-                                                        std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t nrhs,
-                                                         std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t syevd_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
-                                                        std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t syevd_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
-                                                         std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size<float>(sycl::queue& queue, std::int64_t itype,
-                                                        oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
-                                                        std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size<double>(sycl::queue& queue, std::int64_t itype,
-                                                         oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
-                                                         std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
-                                                        oneapi::mkl::diag diag, std::int64_t n,
-                                                        std::int64_t nrhs, std::int64_t lda,
-                                                        std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         oneapi::mkl::diag diag, std::int64_t n,
-                                                         std::int64_t nrhs, std::int64_t lda,
-                                                         std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-    oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-    oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <>
-ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k,
-    std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                                      oneapi::mkl::uplo uplo,
-                                                                      std::int64_t n,
-                                                                      std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                                       oneapi::mkl::uplo uplo,
-                                                                       std::int64_t n,
-                                                                       std::int64_t lda);
-template <>
-ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-    oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda,
-    std::int64_t ldc);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
-                                                              std::int64_t n, std::int64_t lda,
-                                                              std::int64_t stride_a,
-                                                              std::int64_t stride_ipiv,
-                                                              std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
-                                                               std::int64_t n, std::int64_t lda,
-                                                               std::int64_t stride_a,
-                                                               std::int64_t stride_ipiv,
-                                                               std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t n,
-                                                              std::int64_t lda,
-                                                              std::int64_t stride_a,
-                                                              std::int64_t stride_ipiv,
-                                                              std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t n,
-                                                               std::int64_t lda,
-                                                               std::int64_t stride_a,
-                                                               std::int64_t stride_ipiv,
-                                                               std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t stride_ipiv, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t m,
-                                                              std::int64_t n, std::int64_t lda,
-                                                              std::int64_t stride_a,
-                                                              std::int64_t stride_tau,
-                                                              std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t m,
-                                                               std::int64_t n, std::int64_t lda,
-                                                               std::int64_t stride_a,
-                                                               std::int64_t stride_tau,
-                                                               std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a,
-    std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue,
-                                                              oneapi::mkl::uplo uplo,
-                                                              std::int64_t n, std::int64_t lda,
-                                                              std::int64_t stride_a,
-                                                              std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue,
-                                                               oneapi::mkl::uplo uplo,
-                                                               std::int64_t n, std::int64_t lda,
-                                                               std::int64_t stride_a,
-                                                               std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
-                                                              std::int64_t* n, std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
-                                                               std::int64_t* n, std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* n,
-                                                              std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* n,
-                                                               std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count,
-    std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<float>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<double>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
-                                                              std::int64_t* n, std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
-                                                               std::int64_t* n, std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
-                                                              std::int64_t* n, std::int64_t* k,
-                                                              std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
-                                                               std::int64_t* n, std::int64_t* k,
-                                                               std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue,
-                                                              oneapi::mkl::uplo* uplo,
-                                                              std::int64_t* n, std::int64_t* lda,
-                                                              std::int64_t group_count,
-                                                              std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue,
-                                                               oneapi::mkl::uplo* uplo,
-                                                               std::int64_t* n, std::int64_t* lda,
-                                                               std::int64_t group_count,
-                                                               std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<float>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<double>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
-template <>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda,
-    std::int64_t group_count, std::int64_t* group_sizes);
diff --git a/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hxx b/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hxx
deleted file mode 100644
index 3b205f606..000000000
--- a/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hxx
+++ /dev/null
@@ -1,1832 +0,0 @@
-/***************************************************************************
-*  Copyright (C) Codeplay Software Limited
-*  Copyright 2022 Intel Corporation
-*
-*  Licensed under the Apache License, Version 2.0 (the "License");
-*  you may not use this file except in compliance with the License.
-*  You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-*  For your convenience, a copy of the License has been included in this
-*  repository.
-*
-*  Unless required by applicable law or agreed to in writing, software
-*  distributed under the License is distributed on an "AS IS" BASIS,
-*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-*  See the License for the specific language governing permissions and
-*  limitations under the License.
-*
-**************************************************************************/
-
-// Buffer APIs
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tauq,
-                         sycl::buffer<std::complex<float>>& taup,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
-                         sycl::buffer<double>& e, sycl::buffer<double>& tauq,
-                         sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<float>& tauq, sycl::buffer<float>& taup,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tauq,
-                         sycl::buffer<std::complex<double>>& taup,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
-                         std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
-                         sycl::buffer<double>& vt, std::int64_t ldvt,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
-                         sycl::buffer<float>& vt, std::int64_t ldvt,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<float>& s,
-                         sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
-                         sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<double>& s,
-                         sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
-                         sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<float>& d, sycl::buffer<float>& e,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<double>& d, sycl::buffer<double>& e,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& c, std::int64_t ldc,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
-                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
-                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
-                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
-                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                         oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
-                         sycl::buffer<double>& e, sycl::buffer<double>& tau,
-                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
-                         sycl::buffer<float>& e, sycl::buffer<float>& tau,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<double>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::int64_t>& ipiv,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
-                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
-                         std::int64_t ldb, sycl::buffer<float>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                         oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
-                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                         std::int64_t m, std::int64_t n, std::int64_t k,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<float>>& tau,
-                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<float>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                         sycl::buffer<std::complex<double>>& tau,
-                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
-                         sycl::buffer<std::complex<double>>& scratchpad,
-                         std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<float>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<double>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                               std::int64_t stride_ipiv, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<float>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<double>& tau, std::int64_t stride_tau,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
-                               std::int64_t lda, std::int64_t stride_a,
-                               sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-                               std::int64_t stride_b, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<float>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                               std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
-                               std::int64_t stride_tau, std::int64_t batch_size,
-                               sycl::buffer<std::complex<double>>& scratchpad,
-                               std::int64_t scratchpad_size);
-
-// USM APIs
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* d, float* e,
-                                std::complex<float>* tauq, std::complex<float>* taup,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* d, double* e, double* tauq, double* taup,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* d, float* e, float* tauq, float* taup,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* d, double* e,
-                                std::complex<double>* tauq, std::complex<double>* taup,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
-                                std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
-                                std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv,
-                                double* b, std::int64_t ldb, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
-                                float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu,
-                                double* vt, std::int64_t ldvt, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a,
-                                std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt,
-                                std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* s,
-                                std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                                std::int64_t ldvt, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* s,
-                                std::complex<double>* u, std::int64_t ldu, std::complex<double>* vt,
-                                std::int64_t ldvt, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, std::complex<float>* a, std::int64_t lda, float* w,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                                double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                                float* w, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                                double* w, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, float* d, float* e,
-                                std::complex<float>* tau, std::complex<float>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, double* d, double* e,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
-                                float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
-                                double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                double* a, std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                float* a, std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
-                                std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, double* a, std::int64_t lda, double* tau, double* c,
-                                std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
-                                std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, float* a, std::int64_t lda, float* b,
-                                std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, double* a, std::int64_t lda, double* b,
-                                std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, double* a, std::int64_t lda, double* w,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                std::int64_t n, float* a, std::int64_t lda, float* w,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda,
-                                double* b, std::int64_t ldb, double* w, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda,
-                                float* b, std::int64_t ldb, float* w, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, double* d, double* e, double* tau,
-                                double* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, float* d, float* e, float* tau,
-                                float* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
-                                double* b, std::int64_t ldb, double* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
-                                float* b, std::int64_t ldb, float* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<float>* a,
-                                std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
-                                std::int64_t n, std::int64_t k, std::complex<double>* a,
-                                std::int64_t lda, std::complex<double>* tau,
-                                std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
-                                std::complex<float>* c, std::int64_t ldc,
-                                std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                std::complex<double>* a, std::int64_t lda,
-                                std::complex<double>* tau, std::complex<double>* c,
-                                std::int64_t ldc, std::complex<double>* scratchpad,
-                                std::int64_t scratchpad_size,
-                                const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, float* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, double* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      float** a, std::int64_t* lda, float** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      double** a, std::int64_t* lda, double** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      float** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      double** a, std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t** ipiv, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, double* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                      std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
-                                      std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                                      std::int64_t stride_ipiv, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, float** a,
-                                      std::int64_t* lda, std::int64_t** ipiv, float** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, double** a,
-                                      std::int64_t* lda, std::int64_t** ipiv, double** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                      std::int64_t* n, std::int64_t* nrhs, std::complex<double>** a,
-                                      std::int64_t* lda, std::int64_t** ipiv,
-                                      std::complex<double>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* tau, std::int64_t stride_tau,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, float** a, std::int64_t* lda, float** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, double** a, std::int64_t* lda, double** tau,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      float* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      double* a, std::int64_t lda, std::int64_t stride_a,
-                                      std::int64_t batch_size, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      float** a, std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      double** a, std::int64_t* lda, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<float>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::complex<double>** a, std::int64_t* lda,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, float* a, std::int64_t lda,
-                                      std::int64_t stride_a, float* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      float* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, double* a, std::int64_t lda,
-                                      std::int64_t stride_a, double* b, std::int64_t ldb,
-                                      std::int64_t stride_b, std::int64_t batch_size,
-                                      double* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                      std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* b,
-                                      std::int64_t ldb, std::int64_t stride_b,
-                                      std::int64_t batch_size, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, float** a, std::int64_t* lda, float** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, float* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, double** a, std::int64_t* lda, double** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, double* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<float>** a,
-                                      std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
-                                      std::int64_t group_count, std::int64_t* group_sizes,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                                      std::int64_t* nrhs, std::complex<double>** a,
-                                      std::int64_t* lda, std::complex<double>** b,
-                                      std::int64_t* ldb, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<float>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                      std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                                      std::int64_t stride_a, std::complex<double>* tau,
-                                      std::int64_t stride_tau, std::int64_t batch_size,
-                                      std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, std::complex<float>** a, std::int64_t* lda,
-                                      std::complex<float>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                      std::int64_t* k, std::complex<double>** a, std::int64_t* lda,
-                                      std::complex<double>** tau, std::int64_t group_count,
-                                      std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                                      std::int64_t scratchpad_size,
-                                      const std::vector<sycl::event>& dependencies = {});
-
-// SCRATCHPAD APIs
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                                 oneapi::mkl::jobsvd jobvt, std::int64_t m,
-                                                 std::int64_t n, std::int64_t lda, std::int64_t ldu,
-                                                 std::int64_t ldvt);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                 std::int64_t n, std::int64_t nrhs,
-                                                 std::int64_t lda, std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                 oneapi::mkl::uplo uplo, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
-                                                 oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect,
-                                                 std::int64_t m, std::int64_t n, std::int64_t k,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t k, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::uplo uplo,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t nrhs,
-                                                 std::int64_t lda, std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                 oneapi::mkl::uplo uplo, std::int64_t n,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype,
-                                                 oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 oneapi::mkl::transpose trans,
-                                                 oneapi::mkl::diag diag, std::int64_t n,
-                                                 std::int64_t nrhs, std::int64_t lda,
-                                                 std::int64_t ldb);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect,
-                                                 std::int64_t m, std::int64_t n, std::int64_t k,
-                                                 std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                                 std::int64_t k, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                 std::int64_t n, std::int64_t lda);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                                 oneapi::mkl::uplo uplo,
-                                                 oneapi::mkl::transpose trans, std::int64_t m,
-                                                 std::int64_t n, std::int64_t lda,
-                                                 std::int64_t ldc);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t lda,
-                                                       std::int64_t stride_a,
-                                                       std::int64_t stride_ipiv,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t stride_ipiv,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-    std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
-    std::int64_t stride_b, std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t lda,
-                                                       std::int64_t stride_a,
-                                                       std::int64_t stride_tau,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                       std::int64_t n, std::int64_t lda,
-                                                       std::int64_t stride_a,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                       std::int64_t n, std::int64_t nrhs,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t ldb, std::int64_t stride_b,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t k,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t stride_tau,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m,
-                                                       std::int64_t n, std::int64_t k,
-                                                       std::int64_t lda, std::int64_t stride_a,
-                                                       std::int64_t stride_tau,
-                                                       std::int64_t batch_size);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* lda,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n,
-                                                       std::int64_t* lda, std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
-    std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* lda,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* k,
-                                                       std::int64_t* lda, std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                                       std::int64_t* n, std::int64_t* lda,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                                       std::int64_t* n, std::int64_t* nrhs,
-                                                       std::int64_t* lda, std::int64_t* ldb,
-                                                       std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
-
-template <typename T>
-ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m,
-                                                       std::int64_t* n, std::int64_t* k,
-                                                       std::int64_t* lda, std::int64_t group_count,
-                                                       std::int64_t* group_sizes);
diff --git a/include/oneapi/mkl/namespace_alias.hpp b/include/oneapi/mkl/namespace_alias.hpp
new file mode 100644
index 000000000..3b09b5867
--- /dev/null
+++ b/include/oneapi/mkl/namespace_alias.hpp
@@ -0,0 +1,26 @@
+/***************************************************************************
+*  Copyright (C) Codeplay Software Limited
+*  Licensed under the Apache License, Version 2.0 (the "License");
+*  you may not use this file except in compliance with the License.
+*  You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+*  For your convenience, a copy of the License has been included in this
+*  repository.
+*
+*  Unless required by applicable law or agreed to in writing, software
+*  distributed under the License is distributed on an "AS IS" BASIS,
+*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+*  See the License for the specific language governing permissions and
+*  limitations under the License.
+*
+**************************************************************************/
+
+namespace oneapi {
+// Deprecated namespace is planned to be removed late 2025.
+namespace
+    [[deprecated("Namespace `oneapi::mkl` is deprecated, please use `oneapi::math` instead")]] mkl {
+using namespace math;
+}
+} // namespace oneapi
diff --git a/include/oneapi/mkl/rng.hpp b/include/oneapi/mkl/rng.hpp
index 5dfbdac81..be4760dad 100644
--- a/include/oneapi/mkl/rng.hpp
+++ b/include/oneapi/mkl/rng.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2021 Intel Corporation
+* Copyright 2024 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,25 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RNG_HPP_
-#define _ONEMKL_RNG_HPP_
+#ifndef ONEMATH_MKL_RNG_HPP
+#define ONEMATH_MKL_RNG_HPP
 
-#if __has_include(<sycl/sycl.hpp>)
-#include <sycl/sycl.hpp>
-#else
-#include <CL/sycl.hpp>
-#endif
-#include <complex>
-#include <cstdint>
+// Deprecated header is planned to be removed late 2025.
+#pragma message( \
+    "Header `oneapi/mkl/rng.hpp` is deprecated, please use `oneapi/math/rng.hpp` instead")
 
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "oneapi/math/rng.hpp"
 
-#include "oneapi/mkl/rng/predicates.hpp"
-#include "oneapi/mkl/rng/detail/rng_loader.hpp"
+#include "namespace_alias.hpp"
 
-#include "oneapi/mkl/rng/functions.hpp"
-#include "oneapi/mkl/rng/distributions.hpp"
-#include "oneapi/mkl/rng/engines.hpp"
-
-#endif // _ONEMKL_RNG_HPP_
+#endif // ONEMATH_MKL_RNG_HPP
diff --git a/include/oneapi/mkl/rng/device.hpp b/include/oneapi/mkl/rng/device.hpp
index a628395d2..9024e066b 100644
--- a/include/oneapi/mkl/rng/device.hpp
+++ b/include/oneapi/mkl/rng/device.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright 2024 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,12 +17,15 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _MKL_RNG_SYCL_DEVICE_HPP__
-#define _MKL_RNG_SYCL_DEVICE_HPP__
+#ifndef ONEMATH_MKL_RNG_DEVICE_HPP
+#define ONEMATH_MKL_RNG_DEVICE_HPP
 
-#include "oneapi/mkl/rng/device/types.hpp"
-#include "oneapi/mkl/rng/device/functions.hpp"
-#include "oneapi/mkl/rng/device/distributions.hpp"
-#include "oneapi/mkl/rng/device/engines.hpp"
+// Deprecated header is planned to be removed late 2025.
+#pragma message( \
+    "Header `oneapi/mkl/rng/device.hpp` is deprecated, please use `oneapi/math/rng/math.hpp` instead")
 
-#endif // _MKL_RNG_SYCL_DEVICE_HPP__
+#include "oneapi/math/rng/device.hpp"
+
+#include "namespace_alias.hpp"
+
+#endif // ONEMATH_MKL_RNG_DEVICE_HPP
diff --git a/include/oneapi/mkl/sparse_blas.hpp b/include/oneapi/mkl/sparse_blas.hpp
index 8fb86f244..1ec890462 100644
--- a/include/oneapi/mkl/sparse_blas.hpp
+++ b/include/oneapi/mkl/sparse_blas.hpp
@@ -1,43 +1,31 @@
-/***************************************************************************
-*  Copyright (C) Codeplay Software Limited
-*  Licensed under the Apache License, Version 2.0 (the "License");
-*  you may not use this file except in compliance with the License.
-*  You may obtain a copy of the License at
+/*******************************************************************************
+* Copyright 2024 Intel Corporation
 *
-*      http://www.apache.org/licenses/LICENSE-2.0
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
 *
-*  For your convenience, a copy of the License has been included in this
-*  repository.
+* http://www.apache.org/licenses/LICENSE-2.0
 *
-*  Unless required by applicable law or agreed to in writing, software
-*  distributed under the License is distributed on an "AS IS" BASIS,
-*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-*  See the License for the specific language governing permissions and
-*  limitations under the License.
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
 *
-**************************************************************************/
-
-#ifndef _ONEMKL_SPARSE_BLAS_HPP_
-#define _ONEMKL_SPARSE_BLAS_HPP_
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
 
-#if __has_include(<sycl/sycl.hpp>)
-#include <sycl/sycl.hpp>
-#else
-#include <CL/sycl.hpp>
-#endif
+#ifndef ONEMATH_MKL_SPARSE_BLAS_HPP
+#define ONEMATH_MKL_SPARSE_BLAS_HPP
 
-#include "oneapi/mkl/detail/config.hpp"
+// Deprecated header is planned to be removed late 2025.
+#pragma message( \
+    "Header `oneapi/mkl/sparse_blas.hpp` is deprecated, please use `oneapi/math/sparse_blas.hpp` instead")
 
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
-#include "sparse_blas/detail/mklcpu/sparse_blas_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
-#include "sparse_blas/detail/mklgpu/sparse_blas_ct.hpp"
-#endif
-#ifdef ONEMKL_ENABLE_CUSPARSE_BACKEND
-#include "sparse_blas/detail/cusparse/sparse_blas_ct.hpp"
-#endif
+#include "oneapi/math/sparse_blas.hpp"
 
-#include "sparse_blas/detail/sparse_blas_rt.hpp"
+#include "namespace_alias.hpp"
 
-#endif // _ONEMKL_SPARSE_BLAS_HPP_
+#endif // ONEMATH_MKL_SPARSE_BLAS_HPP
diff --git a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx
deleted file mode 100644
index e25fff46e..000000000
--- a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx
+++ /dev/null
@@ -1,217 +0,0 @@
-/***************************************************************************
-*  Copyright(C) Codeplay Software Limited
-*  Licensed under the Apache License, Version 2.0(the "License");
-*  you may not use this file except in compliance with the License.
-*  You may obtain a copy of the License at
-*
-*      http://www.apache.org/licenses/LICENSE-2.0
-*
-*  For your convenience, a copy of the License has been included in this
-*  repository.
-*
-*  Unless required by applicable law or agreed to in writing, software
-*  distributed under the License is distributed on an "AS IS" BASIS,
-*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-*  See the License for the specific language governing permissions and
-*  limitations under the License.
-*
-**************************************************************************/
-
-// This file is meant to be included in each backend onemkl_sparse_blas_BACKEND.hpp files.
-// It is used to exports each symbol to the onemkl_sparse_blas_BACKEND library.
-
-// Dense vector
-template <typename dataType>
-ONEMKL_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle,
-                                     std::int64_t size, sycl::buffer<dataType, 1> val);
-template <typename dataType>
-ONEMKL_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle,
-                                     std::int64_t size, dataType* val);
-
-template <typename dataType>
-ONEMKL_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle,
-                                         std::int64_t size, sycl::buffer<dataType, 1> val);
-template <typename dataType>
-ONEMKL_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle,
-                                         std::int64_t size, dataType* val);
-
-ONEMKL_EXPORT sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle,
-                                               const std::vector<sycl::event>& dependencies = {});
-
-// Dense matrix
-template <typename dataType>
-ONEMKL_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle,
-                                     std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
-                                     layout dense_layout, sycl::buffer<dataType, 1> val);
-template <typename dataType>
-ONEMKL_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle,
-                                     std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
-                                     layout dense_layout, dataType* val);
-
-template <typename dataType>
-ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle,
-                                         std::int64_t num_rows, std::int64_t num_cols,
-                                         std::int64_t ld, layout dense_layout,
-                                         sycl::buffer<dataType, 1> val);
-template <typename dataType>
-ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle,
-                                         std::int64_t num_rows, std::int64_t num_cols,
-                                         std::int64_t ld, layout dense_layout, dataType* val);
-
-ONEMKL_EXPORT sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle,
-                                               const std::vector<sycl::event>& dependencies = {});
-
-// COO matrix
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
-                                   std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                                   index_base index, sycl::buffer<indexType, 1> row_ind,
-                                   sycl::buffer<indexType, 1> col_ind,
-                                   sycl::buffer<dataType, 1> val);
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
-                                   std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                                   index_base index, indexType* row_ind, indexType* col_ind,
-                                   dataType* val);
-
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
-                                       std::int64_t num_rows, std::int64_t num_cols,
-                                       std::int64_t nnz, index_base index,
-                                       sycl::buffer<indexType, 1> row_ind,
-                                       sycl::buffer<indexType, 1> col_ind,
-                                       sycl::buffer<dataType, 1> val);
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
-                                       std::int64_t num_rows, std::int64_t num_cols,
-                                       std::int64_t nnz, index_base index, indexType* row_ind,
-                                       indexType* col_ind, dataType* val);
-
-// CSR matrix
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
-                                   std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                                   index_base index, sycl::buffer<indexType, 1> row_ptr,
-                                   sycl::buffer<indexType, 1> col_ind,
-                                   sycl::buffer<dataType, 1> val);
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle,
-                                   std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                                   index_base index, indexType* row_ptr, indexType* col_ind,
-                                   dataType* val);
-
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
-                                       std::int64_t num_rows, std::int64_t num_cols,
-                                       std::int64_t nnz, index_base index,
-                                       sycl::buffer<indexType, 1> row_ptr,
-                                       sycl::buffer<indexType, 1> col_ind,
-                                       sycl::buffer<dataType, 1> val);
-template <typename dataType, typename indexType>
-ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle,
-                                       std::int64_t num_rows, std::int64_t num_cols,
-                                       std::int64_t nnz, index_base index, indexType* row_ptr,
-                                       indexType* col_ind, dataType* val);
-
-// Common sparse matrix functions
-ONEMKL_EXPORT sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle,
-                                                const std::vector<sycl::event>& dependencies = {});
-
-bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property);
-
-// SPMM
-ONEMKL_EXPORT void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr);
-
-ONEMKL_EXPORT sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr,
-                                             const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                    oneapi::mkl::transpose opB, const void* alpha,
-                                    matrix_view A_view, matrix_handle_t A_handle,
-                                    dense_matrix_handle_t B_handle, const void* beta,
-                                    dense_matrix_handle_t C_handle, spmm_alg alg,
-                                    spmm_descr_t spmm_descr, std::size_t& temp_buffer_size);
-
-ONEMKL_EXPORT void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                 oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
-                                 matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
-                                 const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
-                                 spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace);
-
-ONEMKL_EXPORT sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                        oneapi::mkl::transpose opB, const void* alpha,
-                                        matrix_view A_view, matrix_handle_t A_handle,
-                                        dense_matrix_handle_t B_handle, const void* beta,
-                                        dense_matrix_handle_t C_handle, spmm_alg alg,
-                                        spmm_descr_t spmm_descr, void* workspace,
-                                        const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA,
-                               oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
-                               matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
-                               const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
-                               spmm_descr_t spmm_descr,
-                               const std::vector<sycl::event>& dependencies = {});
-
-// SPMV
-ONEMKL_EXPORT void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr);
-
-ONEMKL_EXPORT sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr,
-                                             const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                    const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
-                                    dense_vector_handle_t x_handle, const void* beta,
-                                    dense_vector_handle_t y_handle, spmv_alg alg,
-                                    spmv_descr_t spmv_descr, std::size_t& temp_buffer_size);
-
-ONEMKL_EXPORT void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                                 matrix_view A_view, matrix_handle_t A_handle,
-                                 dense_vector_handle_t x_handle, const void* beta,
-                                 dense_vector_handle_t y_handle, spmv_alg alg,
-                                 spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> workspace);
-
-ONEMKL_EXPORT sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                        const void* alpha, matrix_view A_view,
-                                        matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                                        const void* beta, dense_vector_handle_t y_handle,
-                                        spmv_alg alg, spmv_descr_t spmv_descr, void* workspace,
-                                        const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                               matrix_view A_view, matrix_handle_t A_handle,
-                               dense_vector_handle_t x_handle, const void* beta,
-                               dense_vector_handle_t y_handle, spmv_alg alg,
-                               spmv_descr_t spmv_descr,
-                               const std::vector<sycl::event>& dependencies = {});
-
-// SPSV
-ONEMKL_EXPORT void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr);
-
-ONEMKL_EXPORT sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr,
-                                             const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                    const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
-                                    dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
-                                    spsv_alg alg, spsv_descr_t spsv_descr,
-                                    std::size_t& temp_buffer_size);
-
-ONEMKL_EXPORT void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                                 matrix_view A_view, matrix_handle_t A_handle,
-                                 dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
-                                 spsv_alg alg, spsv_descr_t spsv_descr,
-                                 sycl::buffer<std::uint8_t, 1> workspace);
-
-ONEMKL_EXPORT sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                        const void* alpha, matrix_view A_view,
-                                        matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                                        dense_vector_handle_t y_handle, spsv_alg alg,
-                                        spsv_descr_t spsv_descr, void* workspace,
-                                        const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_EXPORT sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                               matrix_view A_view, matrix_handle_t A_handle,
-                               dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
-                               spsv_alg alg, spsv_descr_t spsv_descr,
-                               const std::vector<sycl::event>& dependencies = {});
diff --git a/scripts/generate_backend_api.py b/scripts/generate_backend_api.py
index cc76a20b7..3fde4a8b3 100755
--- a/scripts/generate_backend_api.py
+++ b/scripts/generate_backend_api.py
@@ -40,10 +40,10 @@ def usage(err = None):
 
 Example:
 The command below will generate:
-"onemkl_blas_mklgpu.hpp" header with declaration of all backend library APIs.
-API from backend library will be called from "oneapi::mkl::mklgpu::blas" namespace.
+"onemath_blas_mklgpu.hpp" header with declaration of all backend library APIs.
+API from backend library will be called from "oneapi::math::mklgpu::blas" namespace.
 
-{script}  include/oneapi/mkl/blas.hpp include/oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp oneapi::mkl::mklgpu::blas
+{script}  include/oneapi/math/blas.hpp include/oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp oneapi::math::mklgpu::blas
 '''.format(script = argv[0]))
 
 if len(argv) < 3:
@@ -95,7 +95,7 @@ def print_declaration(func_list):
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 """.format(in_filename=in_filename))
 
 for nmsp in namespace_list:
diff --git a/scripts/generate_cmake.py b/scripts/generate_cmake.py
index 36cac3669..506cf3529 100644
--- a/scripts/generate_cmake.py
+++ b/scripts/generate_cmake.py
@@ -38,7 +38,7 @@ def usage(err = None):
 
 Example:
 
-    {script}  include/oneapi/mkl/blas/detail/mklgpu mklgpu
+    {script}  include/oneapi/math/blas/detail/mklgpu mklgpu
 '''.format(script = argv[0]))
 
 if len(argv) <= 2:
@@ -72,7 +72,7 @@ def usage(err = None):
 # generated file
 #
 
-set(LIB_NAME onemkl_blas_{libname})
+set(LIB_NAME onemath_blas_{libname})
 set(LIB_OBJ ${{LIB_NAME}}_obj)
 
 # Add third-party library
@@ -84,7 +84,7 @@ def usage(err = None):
 
 for f in file_list:
     if re.search('_dyn.c', f):
-        out_file.write("""  $<$<BOOL:${{ONEMKL_BUILD_SHARED_LIBS}}>: {filename}>
+        out_file.write("""  $<$<BOOL:${{ONEMATH_BUILD_SHARED_LIBS}}>: {filename}>
 """.format(filename=f))
     else:
         out_file.write("""  {filename}
@@ -99,7 +99,7 @@ def usage(err = None):
 )
 
 target_link_libraries(${{LIB_OBJ}}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     # Add third party library to link with here
 )
 
@@ -118,8 +118,8 @@ def usage(err = None):
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${{LIB_NAME}}>)
 
 # Add the library to install package
-install(TARGETS ${{LIB_OBJ}} EXPORT oneMKLTargets)
-install(TARGETS ${{LIB_NAME}} EXPORT oneMKLTargets
+install(TARGETS ${{LIB_OBJ}} EXPORT oneMathTargets)
+install(TARGETS ${{LIB_NAME}} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/scripts/generate_ct_instant.py b/scripts/generate_ct_instant.py
index a30f53f21..7ed4b12b9 100755
--- a/scripts/generate_ct_instant.py
+++ b/scripts/generate_ct_instant.py
@@ -41,9 +41,9 @@ def usage(err = None):
 Example:
 The command below will generate:
 "blas_ct.hpp" header with compile-time BLAS API based on "blas_ct_templates.hpp" for "mklgpu" backend.
-API from the backend library will be called from "oneapi::mkl::mklgpu::blas" namespace.
+API from the backend library will be called from "oneapi::math::mklgpu::blas" namespace.
 
-{script}  include/oneapi/mkl/blas/detail/blas_ct_templates.hpp include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hpp include/oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp mklgpu oneapi::mkl::mklgpu::blas
+{script}  include/oneapi/math/blas/detail/blas_ct_templates.hpp include/oneapi/math/blas/detail/mklgpu/blas_ct.hpp include/oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp mklgpu oneapi::math::mklgpu::blas
 '''.format(script = argv[0]))
 
 if len(argv) < 6:
@@ -101,8 +101,8 @@ def print_funcs(func_list):
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backends.hpp"
 #include "{internal_api}"
 #include "{ct_teplates}"
 
diff --git a/scripts/generate_ct_templates.py b/scripts/generate_ct_templates.py
index 4827c43e1..815991866 100755
--- a/scripts/generate_ct_templates.py
+++ b/scripts/generate_ct_templates.py
@@ -42,7 +42,7 @@ def usage(err = None):
 The command below will generate:
 "blas_ct_templates.hpp" header with general templates for compile-time BLAS API based on "blas.hpp".
 
-    {script}  include/oneapi/mkl/blas.hpp include/oneapi/mkl/blas/detail/blas_ct_templates.hpp
+    {script}  include/oneapi/math/blas.hpp include/oneapi/math/blas/detail/blas_ct_templates.hpp
 '''.format(script = argv[0]))
 
 if len(argv) < 2:
@@ -65,7 +65,7 @@ def print_funcs(func_list):
     code=""
     for data in func_list:
         code +="""
-template <oneapi::mkl::backend backend> static inline {ret_type} {name}{par_str};
+template <oneapi::math::backend backend> static inline {ret_type} {name}{par_str};
 """.format(**data)
     return code
 
@@ -90,8 +90,8 @@ def print_funcs(func_list):
 #include <complex>
 #include <cstdint>
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backends.hpp"
 
 """.format(in_header=in_filename))
 
diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py
index c4e18d648..110768f4e 100755
--- a/scripts/generate_wrappers.py
+++ b/scripts/generate_wrappers.py
@@ -40,7 +40,7 @@ def usage(err = None):
 
 Example:
 
-    {script}  include/oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp src/blas/function_table.hpp src/blas/backend/mklgpu/wrappers.cpp mklgpu
+    {script}  include/oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp src/blas/function_table.hpp src/blas/backend/mklgpu/wrappers.cpp mklgpu
 '''.format(script = argv[0]))
 
 if len(argv) <= 4:
@@ -94,7 +94,7 @@ def print_funcs(func_list):
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 #include "{header}"
 
@@ -144,7 +144,7 @@ def print_funcs(func_list):
 
 #define WRAPPER_VERSION 1
 
-extern "C" function_table_t mkl_blas_table = {{
+extern "C" function_table_t onemath_blas_table = {{
     WRAPPER_VERSION,
 """.format(table=in_table.strip('src/'), header=in_filename.strip('include/')))
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c363d8a8d..9ff24a721 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,24 +17,59 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-# Define common build flags for oneMKL libraries
-set(ONEMKL_BUILD_COPT "")
+# Define common build flags for oneMath libraries
+set(ONEMATH_BUILD_COPT "")
 if(WIN32 AND BUILD_SHARED_LIBS)
-  list(APPEND ONEMKL_BUILD_COPT "-Donemkl_EXPORTS")
+  list(APPEND ONEMATH_BUILD_COPT "-Donemath_EXPORTS")
 endif()
 
 # store path to CMAKE_CURRENT_BINARY_DIR to use it later (makes FetchContent_Declare workable)
-set(ONEMKL_GENERATED_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR})
+set(ONEMATH_GENERATED_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR})
 
 
-set(ONEMKL_INTERFACE_INCLUDE_DIRS
+set(ONEMATH_INCLUDE_DIRS
   $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
   $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
   $<INSTALL_INTERFACE:include>
 )
 
+# Create a CMake library as a deprecated alias of target_name
+# The deprecated target name is based on the target_name with "onemath" replaced to "onemkl"
+# Deprecated targets are planned to be removed late 2025
+function(add_deprecated_library target_name)
+  string(REPLACE "onemath" "onemkl" deprecated_name "${target_name}")
+  if("${target_name}" EQUAL "${deprecated_name}")
+    message(FATAL_ERROR "Internal error: add_deprecated_library was not able to generate a deprecated target name")
+  endif()
+  # The INTERFACE IMPORTED target works like an alias which can have different properties
+  add_library(deprecated_name INTERFACE IMPORTED)
+  target_link_libraries(deprecated_name INTERFACE target_name)
+  set_target_properties(deprecated_name PROPERTIES DEPRECATION "${deprecated_name} target is deprecated, please use ${target_name} instead")
+endfunction()
+
+
+# Due to using the same directory structure and file name for different headers
+# in this library and in the Intel(R) oneAPI Math Kernel Library, the compiler
+# may not include the expected headers.
+# Intel oneMKL include path is set as system include meaning it is always
+# searched last no matter the order the order of the include flag in the command
+# line argument.
+# Using the -iquote flag is not supported on Windows.
+# To avoid confusion the include paths are set up with a different "root" folder
+# i.e.:
+#   * the oneMath include path is `${PROJECT_SOURCE_DIR}/include` and its
+#     deprecated headers can be included using `#include "oneapi/mkl/mkl.hpp"`
+#     for instance.
+#   * the Intel oneMKL include path is `${MKL_INCLUDE}/include/oneapi` and its
+#     headers can be included using `#include "mkl/mkl.hpp"` for instance.
+function(target_add_intel_onemkl_include target_name)
+  target_include_directories(${target_name}
+    PRIVATE ${MKL_INCLUDE}/oneapi
+  )
+endfunction()
+
 # Build loader and backends for each domain
-add_custom_target(onemkl_backend_libs)
+add_custom_target(onemath_backend_libs)
 foreach(domain ${TARGET_DOMAINS})
   add_subdirectory(${domain})
 endforeach()
@@ -45,72 +80,74 @@ function(generate_header_file)
   # Following if-conditions allow to decouple cmake configuration variables with
   # the corresponding generated macro. This is done to be conformant with ES.33
   # C++ Core Guidelines
-  set(ONEMKL_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
-  set(ONEMKL_ENABLE_MKLCPU_BACKEND ${ENABLE_MKLCPU_BACKEND})
-  set(ONEMKL_ENABLE_MKLGPU_BACKEND ${ENABLE_MKLGPU_BACKEND})
-  set(ONEMKL_ENABLE_CUBLAS_BACKEND ${ENABLE_CUBLAS_BACKEND})
-  set(ONEMKL_ENABLE_ROCBLAS_BACKEND ${ENABLE_ROCBLAS_BACKEND})
-  set(ONEMKL_ENABLE_NETLIB_BACKEND ${ENABLE_NETLIB_BACKEND})
-  set(ONEMKL_ENABLE_PORTBLAS_BACKEND ${ENABLE_PORTBLAS_BACKEND})
-  set(ONEMKL_ENABLE_CURAND_BACKEND ${ENABLE_CURAND_BACKEND})
-  set(ONEMKL_ENABLE_ROCRAND_BACKEND ${ENABLE_ROCRAND_BACKEND})
-  set(ONEMKL_ENABLE_CUSOLVER_BACKEND ${ENABLE_CUSOLVER_BACKEND})
-  set(ONEMKL_ENABLE_ROCSOLVER_BACKEND ${ENABLE_ROCSOLVER_BACKEND})
-  set(ONEMKL_ENABLE_CUFFT_BACKEND ${ENABLE_CUFFT_BACKEND})
-  set(ONEMKL_ENABLE_ROCFFT_BACKEND ${ENABLE_ROCFFT_BACKEND})
-  set(ONEMKL_ENABLE_PORTFFT_BACKEND ${ENABLE_PORTFFT_BACKEND})
-  set(ONEMKL_ENABLE_CUSPARSE_BACKEND ${ENABLE_CUSPARSE_BACKEND})
-
-  configure_file(config.hpp.in "${CMAKE_CURRENT_BINARY_DIR}/oneapi/mkl/config.hpp.configured")
+  set(ONEMATH_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
+  set(ONEMATH_ENABLE_MKLCPU_BACKEND ${ENABLE_MKLCPU_BACKEND})
+  set(ONEMATH_ENABLE_MKLGPU_BACKEND ${ENABLE_MKLGPU_BACKEND})
+  set(ONEMATH_ENABLE_CUBLAS_BACKEND ${ENABLE_CUBLAS_BACKEND})
+  set(ONEMATH_ENABLE_ROCBLAS_BACKEND ${ENABLE_ROCBLAS_BACKEND})
+  set(ONEMATH_ENABLE_NETLIB_BACKEND ${ENABLE_NETLIB_BACKEND})
+  set(ONEMATH_ENABLE_PORTBLAS_BACKEND ${ENABLE_PORTBLAS_BACKEND})
+  set(ONEMATH_ENABLE_CURAND_BACKEND ${ENABLE_CURAND_BACKEND})
+  set(ONEMATH_ENABLE_ROCRAND_BACKEND ${ENABLE_ROCRAND_BACKEND})
+  set(ONEMATH_ENABLE_CUSOLVER_BACKEND ${ENABLE_CUSOLVER_BACKEND})
+  set(ONEMATH_ENABLE_ROCSOLVER_BACKEND ${ENABLE_ROCSOLVER_BACKEND})
+  set(ONEMATH_ENABLE_CUFFT_BACKEND ${ENABLE_CUFFT_BACKEND})
+  set(ONEMATH_ENABLE_ROCFFT_BACKEND ${ENABLE_ROCFFT_BACKEND})
+  set(ONEMATH_ENABLE_PORTFFT_BACKEND ${ENABLE_PORTFFT_BACKEND})
+  set(ONEMATH_ENABLE_CUSPARSE_BACKEND ${ENABLE_CUSPARSE_BACKEND})
+
+  configure_file(config.hpp.in "${CMAKE_CURRENT_BINARY_DIR}/oneapi/math/config.hpp.configured")
   file(GENERATE
-    OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/oneapi/mkl/detail/config.hpp"
-    INPUT "${CMAKE_CURRENT_BINARY_DIR}/oneapi/mkl/config.hpp.configured"
+    OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/oneapi/math/detail/config.hpp"
+    INPUT "${CMAKE_CURRENT_BINARY_DIR}/oneapi/math/config.hpp.configured"
   )
-  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/oneapi/mkl/detail/config.hpp"
-    DESTINATION include/oneapi/mkl/detail
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/oneapi/math/detail/config.hpp"
+    DESTINATION include/oneapi/math/detail
     COMPONENT Devel
   )
 endfunction()
 
 generate_header_file()
 
-# Add recipe for onemkl loader library
+# Add recipe for onemath loader library
 if(BUILD_SHARED_LIBS)
-  add_library(onemkl SHARED)
+  add_library(onemath SHARED)
 
   # The loader library depends on all the backend libraries as it uses
   # dlopen to load them at runtime.
   # Use add_dependencies to ensure that all the backend libraries are
   # (re-)built when compiling the loader or runtime binaries.
-  add_dependencies(onemkl onemkl_backend_libs)
+  add_dependencies(onemath onemath_backend_libs)
 
-  target_include_directories(onemkl
-    PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  target_include_directories(onemath
+    PUBLIC ${ONEMATH_INCLUDE_DIRS}
   )
-  set_target_properties(onemkl PROPERTIES
+  set_target_properties(onemath PROPERTIES
     SOVERSION ${PROJECT_VERSION_MAJOR}
   )
-  # w/a for setting oneMKL Interfaces installed headers as -I instead of -isystem for cmake >= 3.25 for workable find_package(MKL) combination
+  # w/a for setting oneMath installed headers as -I instead of -isystem for cmake >= 3.25 for workable find_package(MKL) combination
   if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.25.0")
-    set_target_properties(onemkl PROPERTIES EXPORT_NO_SYSTEM true)
+    set_target_properties(onemath PROPERTIES EXPORT_NO_SYSTEM true)
   endif()
 
   # Build dispatcher library
-  set (ONEMKL_LIBS ${TARGET_DOMAINS})
-  list(TRANSFORM ONEMKL_LIBS PREPEND onemkl_)
-  target_link_libraries(onemkl PUBLIC ${ONEMKL_LIBS})
-  set_target_properties(onemkl PROPERTIES
+  set (ONEMATH_LIBS ${TARGET_DOMAINS})
+  list(TRANSFORM ONEMATH_LIBS PREPEND onemath_)
+  target_link_libraries(onemath PUBLIC ${ONEMATH_LIBS})
+  set_target_properties(onemath PROPERTIES
       INSTALL_RPATH "\$ORIGIN"
       BUILD_WITH_INSTALL_RPATH TRUE
   )
 
   # Add the library to install package
-  foreach(domain_lib ${ONEMKL_LIBS})
-    install(TARGETS ${domain_lib} EXPORT oneMKLTargets)
+  foreach(domain_lib ${ONEMATH_LIBS})
+    install(TARGETS ${domain_lib} EXPORT oneMathTargets)
   endforeach()
-  install(TARGETS onemkl EXPORT oneMKLTargets
+  install(TARGETS onemath EXPORT oneMathTargets
     RUNTIME DESTINATION bin
     ARCHIVE DESTINATION lib
     LIBRARY DESTINATION lib
   )
+
+  add_deprecated_library(onemkl onemath)
 endif()
diff --git a/src/blas/CMakeLists.txt b/src/blas/CMakeLists.txt
index 1edf2e445..2d0d796fa 100644
--- a/src/blas/CMakeLists.txt
+++ b/src/blas/CMakeLists.txt
@@ -22,26 +22,27 @@ add_subdirectory(backends)
 
 # Recipe for BLAS loader object
 if(BUILD_SHARED_LIBS)
-add_library(onemkl_blas OBJECT)
-target_sources(onemkl_blas PRIVATE blas_loader.cpp)
-target_include_directories(onemkl_blas
+add_library(onemath_blas OBJECT)
+add_deprecated_library(onemath_blas)
+target_sources(onemath_blas PRIVATE blas_loader.cpp)
+target_include_directories(onemath_blas
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
-          $<TARGET_FILE_DIR:onemkl>
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
+          $<TARGET_FILE_DIR:onemath>
 )
 
-target_compile_options(onemkl_blas PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(onemath_blas PRIVATE ${ONEMATH_BUILD_COPT})
 
-set_target_properties(onemkl_blas PROPERTIES
+set_target_properties(onemath_blas PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
-  add_sycl_to_target(TARGET onemkl_blas SOURCES blas_loader.cpp)
+  add_sycl_to_target(TARGET onemath_blas SOURCES blas_loader.cpp)
 else()
-  target_link_libraries(onemkl_blas PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(onemath_blas PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
 endif()
diff --git a/src/blas/backends/CMakeLists.txt b/src/blas/backends/CMakeLists.txt
index 351f4b0e5..ac7595fc6 100644
--- a/src/blas/backends/CMakeLists.txt
+++ b/src/blas/backends/CMakeLists.txt
@@ -17,8 +17,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-add_custom_target(onemkl_backend_libs_blas)
-add_dependencies(onemkl_backend_libs onemkl_backend_libs_blas)
+add_custom_target(onemath_backend_libs_blas)
+add_dependencies(onemath_backend_libs onemath_backend_libs_blas)
 
 if(ENABLE_MKLCPU_BACKEND)
   add_subdirectory(mklcpu)
diff --git a/src/blas/backends/backend_wrappers.cxx b/src/blas/backends/backend_wrappers.cxx
index 62f6ced13..b0c8c9c70 100644
--- a/src/blas/backends/backend_wrappers.cxx
+++ b/src/blas/backends/backend_wrappers.cxx
@@ -18,494 +18,494 @@
 *******************************************************************************/
 
 // clang-format off
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dot,
-oneapi::mkl::blas::BACKEND::MAJOR::dot,
-oneapi::mkl::blas::BACKEND::MAJOR::dot,
-oneapi::mkl::blas::BACKEND::MAJOR::dotc,
-oneapi::mkl::blas::BACKEND::MAJOR::dotc,
-oneapi::mkl::blas::BACKEND::MAJOR::dotu,
-oneapi::mkl::blas::BACKEND::MAJOR::dotu,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotm,
-oneapi::mkl::blas::BACKEND::MAJOR::rotm,
-oneapi::mkl::blas::BACKEND::MAJOR::rotmg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotmg,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::sdsdot,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::ger,
-oneapi::mkl::blas::BACKEND::MAJOR::ger,
-oneapi::mkl::blas::BACKEND::MAJOR::gerc,
-oneapi::mkl::blas::BACKEND::MAJOR::gerc,
-oneapi::mkl::blas::BACKEND::MAJOR::geru,
-oneapi::mkl::blas::BACKEND::MAJOR::geru,
-oneapi::mkl::blas::BACKEND::MAJOR::hbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hemv,
-oneapi::mkl::blas::BACKEND::MAJOR::hemv,
-oneapi::mkl::blas::BACKEND::MAJOR::her,
-oneapi::mkl::blas::BACKEND::MAJOR::her,
-oneapi::mkl::blas::BACKEND::MAJOR::her2,
-oneapi::mkl::blas::BACKEND::MAJOR::her2,
-oneapi::mkl::blas::BACKEND::MAJOR::hpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr2,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr2,
-oneapi::mkl::blas::BACKEND::MAJOR::sbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::sbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::spmv,
-oneapi::mkl::blas::BACKEND::MAJOR::spmv,
-oneapi::mkl::blas::BACKEND::MAJOR::spr,
-oneapi::mkl::blas::BACKEND::MAJOR::spr,
-oneapi::mkl::blas::BACKEND::MAJOR::spr2,
-oneapi::mkl::blas::BACKEND::MAJOR::spr2,
-oneapi::mkl::blas::BACKEND::MAJOR::symv,
-oneapi::mkl::blas::BACKEND::MAJOR::symv,
-oneapi::mkl::blas::BACKEND::MAJOR::syr,
-oneapi::mkl::blas::BACKEND::MAJOR::syr,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::hemm,
-oneapi::mkl::blas::BACKEND::MAJOR::hemm,
-oneapi::mkl::blas::BACKEND::MAJOR::herk,
-oneapi::mkl::blas::BACKEND::MAJOR::herk,
-oneapi::mkl::blas::BACKEND::MAJOR::her2k,
-oneapi::mkl::blas::BACKEND::MAJOR::her2k,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,    
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,    
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::asum,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::axpby,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::copy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dot,
-oneapi::mkl::blas::BACKEND::MAJOR::dot,
-oneapi::mkl::blas::BACKEND::MAJOR::dot,
-oneapi::mkl::blas::BACKEND::MAJOR::dotc,
-oneapi::mkl::blas::BACKEND::MAJOR::dotc,
-oneapi::mkl::blas::BACKEND::MAJOR::dotu,
-oneapi::mkl::blas::BACKEND::MAJOR::dotu,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamin,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::iamax,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::nrm2,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rot,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotm,
-oneapi::mkl::blas::BACKEND::MAJOR::rotm,
-oneapi::mkl::blas::BACKEND::MAJOR::rotmg,
-oneapi::mkl::blas::BACKEND::MAJOR::rotmg,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::scal,
-oneapi::mkl::blas::BACKEND::MAJOR::sdsdot,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::swap,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemv_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::dgmm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::ger,
-oneapi::mkl::blas::BACKEND::MAJOR::ger,
-oneapi::mkl::blas::BACKEND::MAJOR::gerc,
-oneapi::mkl::blas::BACKEND::MAJOR::gerc,
-oneapi::mkl::blas::BACKEND::MAJOR::geru,
-oneapi::mkl::blas::BACKEND::MAJOR::geru,
-oneapi::mkl::blas::BACKEND::MAJOR::hbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hemv,
-oneapi::mkl::blas::BACKEND::MAJOR::hemv,
-oneapi::mkl::blas::BACKEND::MAJOR::her,
-oneapi::mkl::blas::BACKEND::MAJOR::her,
-oneapi::mkl::blas::BACKEND::MAJOR::her2,
-oneapi::mkl::blas::BACKEND::MAJOR::her2,
-oneapi::mkl::blas::BACKEND::MAJOR::hpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr2,
-oneapi::mkl::blas::BACKEND::MAJOR::hpr2,
-oneapi::mkl::blas::BACKEND::MAJOR::sbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::sbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::spmv,
-oneapi::mkl::blas::BACKEND::MAJOR::spmv,
-oneapi::mkl::blas::BACKEND::MAJOR::spr,
-oneapi::mkl::blas::BACKEND::MAJOR::spr,
-oneapi::mkl::blas::BACKEND::MAJOR::spr2,
-oneapi::mkl::blas::BACKEND::MAJOR::spr2,
-oneapi::mkl::blas::BACKEND::MAJOR::symv,
-oneapi::mkl::blas::BACKEND::MAJOR::symv,
-oneapi::mkl::blas::BACKEND::MAJOR::syr,
-oneapi::mkl::blas::BACKEND::MAJOR::syr,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tbsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpmv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::tpsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trmv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::trsv,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm,
-oneapi::mkl::blas::BACKEND::MAJOR::hemm,
-oneapi::mkl::blas::BACKEND::MAJOR::hemm,
-oneapi::mkl::blas::BACKEND::MAJOR::herk,
-oneapi::mkl::blas::BACKEND::MAJOR::herk,
-oneapi::mkl::blas::BACKEND::MAJOR::her2k,
-oneapi::mkl::blas::BACKEND::MAJOR::her2k,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::symm,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syrk_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::syr2k,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trmm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::trsm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemmt,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::gemm_bias,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy2,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,
-oneapi::mkl::blas::BACKEND::MAJOR::omatadd,    
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::omatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
-oneapi::mkl::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::dot,
+oneapi::math::blas::BACKEND::MAJOR::dot,
+oneapi::math::blas::BACKEND::MAJOR::dot,
+oneapi::math::blas::BACKEND::MAJOR::dotc,
+oneapi::math::blas::BACKEND::MAJOR::dotc,
+oneapi::math::blas::BACKEND::MAJOR::dotu,
+oneapi::math::blas::BACKEND::MAJOR::dotu,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotm,
+oneapi::math::blas::BACKEND::MAJOR::rotm,
+oneapi::math::blas::BACKEND::MAJOR::rotmg,
+oneapi::math::blas::BACKEND::MAJOR::rotmg,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::sdsdot,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::ger,
+oneapi::math::blas::BACKEND::MAJOR::ger,
+oneapi::math::blas::BACKEND::MAJOR::gerc,
+oneapi::math::blas::BACKEND::MAJOR::gerc,
+oneapi::math::blas::BACKEND::MAJOR::geru,
+oneapi::math::blas::BACKEND::MAJOR::geru,
+oneapi::math::blas::BACKEND::MAJOR::hbmv,
+oneapi::math::blas::BACKEND::MAJOR::hbmv,
+oneapi::math::blas::BACKEND::MAJOR::hemv,
+oneapi::math::blas::BACKEND::MAJOR::hemv,
+oneapi::math::blas::BACKEND::MAJOR::her,
+oneapi::math::blas::BACKEND::MAJOR::her,
+oneapi::math::blas::BACKEND::MAJOR::her2,
+oneapi::math::blas::BACKEND::MAJOR::her2,
+oneapi::math::blas::BACKEND::MAJOR::hpmv,
+oneapi::math::blas::BACKEND::MAJOR::hpmv,
+oneapi::math::blas::BACKEND::MAJOR::hpr,
+oneapi::math::blas::BACKEND::MAJOR::hpr,
+oneapi::math::blas::BACKEND::MAJOR::hpr2,
+oneapi::math::blas::BACKEND::MAJOR::hpr2,
+oneapi::math::blas::BACKEND::MAJOR::sbmv,
+oneapi::math::blas::BACKEND::MAJOR::sbmv,
+oneapi::math::blas::BACKEND::MAJOR::spmv,
+oneapi::math::blas::BACKEND::MAJOR::spmv,
+oneapi::math::blas::BACKEND::MAJOR::spr,
+oneapi::math::blas::BACKEND::MAJOR::spr,
+oneapi::math::blas::BACKEND::MAJOR::spr2,
+oneapi::math::blas::BACKEND::MAJOR::spr2,
+oneapi::math::blas::BACKEND::MAJOR::symv,
+oneapi::math::blas::BACKEND::MAJOR::symv,
+oneapi::math::blas::BACKEND::MAJOR::syr,
+oneapi::math::blas::BACKEND::MAJOR::syr,
+oneapi::math::blas::BACKEND::MAJOR::syr2,
+oneapi::math::blas::BACKEND::MAJOR::syr2,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::hemm,
+oneapi::math::blas::BACKEND::MAJOR::hemm,
+oneapi::math::blas::BACKEND::MAJOR::herk,
+oneapi::math::blas::BACKEND::MAJOR::herk,
+oneapi::math::blas::BACKEND::MAJOR::her2k,
+oneapi::math::blas::BACKEND::MAJOR::her2k,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,    
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,    
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::asum,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpy_batch,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::axpby,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::copy_batch,
+oneapi::math::blas::BACKEND::MAJOR::dot,
+oneapi::math::blas::BACKEND::MAJOR::dot,
+oneapi::math::blas::BACKEND::MAJOR::dot,
+oneapi::math::blas::BACKEND::MAJOR::dotc,
+oneapi::math::blas::BACKEND::MAJOR::dotc,
+oneapi::math::blas::BACKEND::MAJOR::dotu,
+oneapi::math::blas::BACKEND::MAJOR::dotu,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamin,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::iamax,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::nrm2,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rot,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotg,
+oneapi::math::blas::BACKEND::MAJOR::rotm,
+oneapi::math::blas::BACKEND::MAJOR::rotm,
+oneapi::math::blas::BACKEND::MAJOR::rotmg,
+oneapi::math::blas::BACKEND::MAJOR::rotmg,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::scal,
+oneapi::math::blas::BACKEND::MAJOR::sdsdot,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::swap,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gbmv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemv_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::dgmm_batch,
+oneapi::math::blas::BACKEND::MAJOR::ger,
+oneapi::math::blas::BACKEND::MAJOR::ger,
+oneapi::math::blas::BACKEND::MAJOR::gerc,
+oneapi::math::blas::BACKEND::MAJOR::gerc,
+oneapi::math::blas::BACKEND::MAJOR::geru,
+oneapi::math::blas::BACKEND::MAJOR::geru,
+oneapi::math::blas::BACKEND::MAJOR::hbmv,
+oneapi::math::blas::BACKEND::MAJOR::hbmv,
+oneapi::math::blas::BACKEND::MAJOR::hemv,
+oneapi::math::blas::BACKEND::MAJOR::hemv,
+oneapi::math::blas::BACKEND::MAJOR::her,
+oneapi::math::blas::BACKEND::MAJOR::her,
+oneapi::math::blas::BACKEND::MAJOR::her2,
+oneapi::math::blas::BACKEND::MAJOR::her2,
+oneapi::math::blas::BACKEND::MAJOR::hpmv,
+oneapi::math::blas::BACKEND::MAJOR::hpmv,
+oneapi::math::blas::BACKEND::MAJOR::hpr,
+oneapi::math::blas::BACKEND::MAJOR::hpr,
+oneapi::math::blas::BACKEND::MAJOR::hpr2,
+oneapi::math::blas::BACKEND::MAJOR::hpr2,
+oneapi::math::blas::BACKEND::MAJOR::sbmv,
+oneapi::math::blas::BACKEND::MAJOR::sbmv,
+oneapi::math::blas::BACKEND::MAJOR::spmv,
+oneapi::math::blas::BACKEND::MAJOR::spmv,
+oneapi::math::blas::BACKEND::MAJOR::spr,
+oneapi::math::blas::BACKEND::MAJOR::spr,
+oneapi::math::blas::BACKEND::MAJOR::spr2,
+oneapi::math::blas::BACKEND::MAJOR::spr2,
+oneapi::math::blas::BACKEND::MAJOR::symv,
+oneapi::math::blas::BACKEND::MAJOR::symv,
+oneapi::math::blas::BACKEND::MAJOR::syr,
+oneapi::math::blas::BACKEND::MAJOR::syr,
+oneapi::math::blas::BACKEND::MAJOR::syr2,
+oneapi::math::blas::BACKEND::MAJOR::syr2,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbmv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tbsv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpmv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::tpsv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trmv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::trsv,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::gemm,
+oneapi::math::blas::BACKEND::MAJOR::hemm,
+oneapi::math::blas::BACKEND::MAJOR::hemm,
+oneapi::math::blas::BACKEND::MAJOR::herk,
+oneapi::math::blas::BACKEND::MAJOR::herk,
+oneapi::math::blas::BACKEND::MAJOR::her2k,
+oneapi::math::blas::BACKEND::MAJOR::her2k,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::symm,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syrk_batch,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::syr2k,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trmm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::trsm,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::trsm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemm_batch,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemmt,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::gemm_bias,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatadd_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy2,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,
+oneapi::math::blas::BACKEND::MAJOR::omatadd,    
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::omatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
+oneapi::math::blas::BACKEND::MAJOR::imatcopy_batch,
     // clang-format on
diff --git a/src/blas/backends/cublas/CMakeLists.txt b/src/blas/backends/cublas/CMakeLists.txt
index b64e7c37d..1cda3352f 100644
--- a/src/blas/backends/cublas/CMakeLists.txt
+++ b/src/blas/backends/cublas/CMakeLists.txt
@@ -17,7 +17,7 @@
 #
 #=========================================================================
 
-set(LIB_NAME onemkl_blas_cublas)
+set(LIB_NAME onemath_blas_cublas)
 set(LIB_OBJ ${LIB_NAME}_obj)
 find_package(cuBLAS REQUIRED)
 set(SOURCES cublas_level1.cpp 
@@ -25,28 +25,29 @@ set(SOURCES cublas_level1.cpp
                 cublas_level3.cpp 
                 cublas_batch.cpp 
                 cublas_extensions.cpp 
-                $<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},dpc++>:cublas_scope_handle.cpp >
-                $<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},hipsycl>:cublas_scope_handle_hipsycl.cpp >
+                $<$<STREQUAL:${ONEMATH_SYCL_IMPLEMENTATION},dpc++>:cublas_scope_handle.cpp >
+                $<$<STREQUAL:${ONEMATH_SYCL_IMPLEMENTATION},hipsycl>:cublas_scope_handle_hipsycl.cpp >
                 $<$<BOOL:${BUILD_SHARED_LIBS}>: cublas_wrappers.cpp>)
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_blas ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src/include
           ${PROJECT_SOURCE_DIR}/src
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
-if(NOT ${ONEMKL_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
-    target_compile_options(ONEMKL::SYCL::SYCL INTERFACE
+if(NOT ${ONEMATH_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
+    target_compile_options(ONEMATH::SYCL::SYCL INTERFACE
           -fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
-    target_link_options(ONEMKL::SYCL::SYCL INTERFACE
+    target_link_options(ONEMATH::SYCL::SYCL INTERFACE
           -fsycl-targets=nvptx64-nvidia-cuda)
 endif()
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL ONEMKL::cuBLAS::cuBLAS)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL ONEMATH::cuBLAS::cuBLAS)
 target_compile_features(${LIB_OBJ} PUBLIC cxx_std_11)
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON)
@@ -64,8 +65,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/blas/backends/cublas/cublas_batch.cpp b/src/blas/backends/cublas/cublas_batch.cpp
index 4a88ea56f..822817d6f 100644
--- a/src/blas/backends/cublas/cublas_batch.cpp
+++ b/src/blas/backends/cublas/cublas_batch.cpp
@@ -18,11 +18,11 @@
 **************************************************************************/
 #include "cublas_helper.hpp"
 #include "cublas_task.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 namespace column_major {
@@ -155,13 +155,13 @@ inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose tran
     cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT;
     queue.submit([&](sycl::handler& cgh) {
         if (!verify_support<sycl::half, Ta, Tb, Tc, Ts>(queue, sycl::aspect::fp16)) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "blas", "sycl::half", "half is not supported by the device or the sycl compiler");
         }
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuTypeA*>(a_acc);
             auto b_ = sc.get_mem<cuTypeB*>(b_acc);
@@ -514,7 +514,7 @@ inline sycl::event gemv_batch(const char* func_name, Func func, sycl::queue& que
     }
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             cublasStatus_t err;
@@ -625,14 +625,14 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose tra
     cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT;
     auto done = queue.submit([&](sycl::handler& cgh) {
         if (!verify_support<sycl::half, Ta, Tb, Tc, Ts>(queue, sycl::aspect::fp16)) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "blas", "sycl::half", "half is not supported by the device or the sycl compiler");
         }
         int64_t num_events = dependencies.size();
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             cublasStatus_t err;
 #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND
@@ -711,14 +711,14 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, tr
     cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT;
     auto done = queue.submit([&](sycl::handler& cgh) {
         if (!verify_support<sycl::half, Ta, Tb, Tc, Ts>(queue, sycl::aspect::fp16)) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "blas", "sycl::half", "half is not supported by the device or the sycl compiler");
         }
         int64_t num_events = dependencies.size();
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             cublasStatus_t err;
@@ -832,7 +832,7 @@ inline sycl::event trsm_batch(const char* func_name, Func func, sycl::queue& que
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             cublasStatus_t err;
@@ -1888,5 +1888,5 @@ sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int
 } // namespace row_major
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/cublas/cublas_extensions.cpp b/src/blas/backends/cublas/cublas_extensions.cpp
index c80392aa6..ab1157135 100644
--- a/src/blas/backends/cublas/cublas_extensions.cpp
+++ b/src/blas/backends/cublas/cublas_extensions.cpp
@@ -18,11 +18,11 @@
 **************************************************************************/
 #include "cublas_helper.hpp"
 #include "cublas_task.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 namespace column_major {
@@ -94,9 +94,9 @@ void omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose tr
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? m : n);
-        const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? n : m);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? m : n);
+        const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? n : m);
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -172,7 +172,7 @@ void omatadd(const char* func_name, Func func, sycl::queue& queue, transpose tra
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -272,9 +272,9 @@ sycl::event omatcopy(const char* func_name, Func func, sycl::queue& queue, trans
     overflow_check(m, n, lda, ldb);
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? m : n);
-        const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? n : m);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? m : n);
+        const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? n : m);
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<cuDataType*>(b);
@@ -356,7 +356,7 @@ inline sycl::event omatadd(const char* func_name, Func func, sycl::queue& queue,
     overflow_check(m, n, lda, ldb, ldc);
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<const cuDataType*>(b);
@@ -457,9 +457,9 @@ void omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose tr
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? n : m);
-        const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? m : n);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? n : m);
+        const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? m : n);
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -535,7 +535,7 @@ void omatadd(const char* func_name, Func func, sycl::queue& queue, transpose tra
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -635,9 +635,9 @@ sycl::event omatcopy(const char* func_name, Func func, sycl::queue& queue, trans
     overflow_check(m, n, lda, ldb);
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? n : m);
-        const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? m : n);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? n : m);
+        const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? m : n);
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<cuDataType*>(b);
@@ -719,7 +719,7 @@ inline sycl::event omatadd(const char* func_name, Func func, sycl::queue& queue,
     overflow_check(m, n, lda, ldb, ldc);
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<const cuDataType*>(b);
@@ -752,5 +752,5 @@ OMATADD_LAUNCHER_USM(std::complex<double>, cublasZgeam)
 } // namespace row_major
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/cublas/cublas_handle.hpp b/src/blas/backends/cublas/cublas_handle.hpp
index 8b77282df..674f02a12 100644
--- a/src/blas/backends/cublas/cublas_handle.hpp
+++ b/src/blas/backends/cublas/cublas_handle.hpp
@@ -22,7 +22,7 @@
 #include "cublas_helper.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 
@@ -49,7 +49,7 @@ struct cublas_handle {
 
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // CUBLAS_HANDLE_HPP
diff --git a/src/blas/backends/cublas/cublas_helper.hpp b/src/blas/backends/cublas/cublas_helper.hpp
index 58df5c006..a67d89126 100644
--- a/src/blas/backends/cublas/cublas_helper.hpp
+++ b/src/blas/backends/cublas/cublas_helper.hpp
@@ -33,17 +33,17 @@
 #include <cuda_fp16.h>
 #include <complex>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include "runtime_support_helper.hpp"
 #include "dtype_string.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 
 // The static assert to make sure that all index types used in
-// src/oneMKL/backend/cublas/blas.hpp interface are int64_t
+// src/oneMath/backend/cublas/blas.hpp interface are int64_t
 template <typename... Next>
 struct is_int64 : std::false_type {};
 
@@ -75,7 +75,7 @@ struct Overflow<Index, T...> {
 
 template <typename Index, typename... Next>
 void overflow_check(Index index, Next... indices) {
-    static_assert(is_int64<Index, Next...>::value, "oneMKL index type must be 64 bit integer.");
+    static_assert(is_int64<Index, Next...>::value, "oneMath index type must be 64 bit integer.");
     Overflow<Index, Next...>::check(index, indices...);
 }
 
@@ -225,35 +225,35 @@ inline void cublas_native_named_func(const char* func_name, Func func, cublasSta
 #endif
 };
 
-inline cublasOperation_t get_cublas_operation(oneapi::mkl::transpose trn) {
+inline cublasOperation_t get_cublas_operation(oneapi::math::transpose trn) {
     switch (trn) {
-        case oneapi::mkl::transpose::nontrans: return CUBLAS_OP_N;
-        case oneapi::mkl::transpose::trans: return CUBLAS_OP_T;
-        case oneapi::mkl::transpose::conjtrans: return CUBLAS_OP_C;
+        case oneapi::math::transpose::nontrans: return CUBLAS_OP_N;
+        case oneapi::math::transpose::trans: return CUBLAS_OP_T;
+        case oneapi::math::transpose::conjtrans: return CUBLAS_OP_C;
         default: throw "Wrong transpose Operation.";
     }
 }
 
-inline cublasFillMode_t get_cublas_fill_mode(oneapi::mkl::uplo ul) {
+inline cublasFillMode_t get_cublas_fill_mode(oneapi::math::uplo ul) {
     switch (ul) {
-        case oneapi::mkl::uplo::upper: return CUBLAS_FILL_MODE_UPPER;
-        case oneapi::mkl::uplo::lower: return CUBLAS_FILL_MODE_LOWER;
+        case oneapi::math::uplo::upper: return CUBLAS_FILL_MODE_UPPER;
+        case oneapi::math::uplo::lower: return CUBLAS_FILL_MODE_LOWER;
         default: throw "Wrong fill mode.";
     }
 }
 
-inline cublasDiagType_t get_cublas_diag_type(oneapi::mkl::diag un) {
+inline cublasDiagType_t get_cublas_diag_type(oneapi::math::diag un) {
     switch (un) {
-        case oneapi::mkl::diag::unit: return CUBLAS_DIAG_UNIT;
-        case oneapi::mkl::diag::nonunit: return CUBLAS_DIAG_NON_UNIT;
+        case oneapi::math::diag::unit: return CUBLAS_DIAG_UNIT;
+        case oneapi::math::diag::nonunit: return CUBLAS_DIAG_NON_UNIT;
         default: throw "Wrong diag type.";
     }
 }
 
-inline cublasSideMode_t get_cublas_side_mode(oneapi::mkl::side lr) {
+inline cublasSideMode_t get_cublas_side_mode(oneapi::math::side lr) {
     switch (lr) {
-        case oneapi::mkl::side::left: return CUBLAS_SIDE_LEFT;
-        case oneapi::mkl::side::right: return CUBLAS_SIDE_RIGHT;
+        case oneapi::math::side::left: return CUBLAS_SIDE_LEFT;
+        case oneapi::math::side::right: return CUBLAS_SIDE_RIGHT;
         default: throw "Wrong side mode.";
     }
 }
@@ -329,6 +329,6 @@ struct CudaEquivalentType<std::complex<double>> {
 
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif // _CUBLAS_HELPER_HPP_
diff --git a/src/blas/backends/cublas/cublas_level1.cpp b/src/blas/backends/cublas/cublas_level1.cpp
index 115712e80..830a1b1e0 100644
--- a/src/blas/backends/cublas/cublas_level1.cpp
+++ b/src/blas/backends/cublas/cublas_level1.cpp
@@ -19,11 +19,11 @@
 #include "cublas_helper.hpp"
 #include "cublas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 namespace column_major {
@@ -41,7 +41,7 @@ inline void asum(const char* func_name, Func func, sycl::queue& queue, int64_t n
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -81,7 +81,7 @@ inline void scal(const char* func_name, Func func, sycl::queue& queue, int64_t n
     overflow_check(n, incx);
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = sc.get_mem<cuDataType2*>(x_acc);
             cublasStatus_t err;
@@ -112,7 +112,7 @@ inline void axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
             auto y_ = sc.get_mem<cuDataType*>(y_acc);
@@ -167,7 +167,7 @@ inline void rotg(const char* func_name, Func func, sycl::queue& queue, sycl::buf
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
         auto s_acc = s.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -211,7 +211,7 @@ inline void rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
         auto param_acc = param.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -250,7 +250,7 @@ inline void copy(const char* func_name, Func func, sycl::queue& queue, int64_t n
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
             auto y_ = sc.get_mem<cuDataType*>(y_acc);
@@ -282,7 +282,7 @@ inline void dot(const char* func_name, Func func, sycl::queue& queue, int64_t n,
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -327,7 +327,7 @@ inline void rot(const char* func_name, Func func, sycl::queue& queue, int64_t n,
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -364,7 +364,7 @@ void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer<float, 1>& x,
         auto x_acc = x.get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.get_access<sycl::access::mode::write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -404,7 +404,7 @@ inline void rotmg(const char* func_name, Func func, sycl::queue& queue, sycl::bu
         auto x1_acc = x1.template get_access<sycl::access::mode::read_write>(cgh);
         auto y1_acc = y1_buff.template get_access<sycl::access::mode::read>(cgh);
         auto param_acc = param.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -453,7 +453,7 @@ inline void iamax(const char* func_name, Func func, sycl::queue& queue, int64_t
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto int_res_acc = int_res_buff.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -501,7 +501,7 @@ inline void swap(const char* func_name, Func func, sycl::queue& queue, int64_t n
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
             auto y_ = sc.get_mem<cuDataType*>(y_acc);
@@ -539,7 +539,7 @@ inline void iamin(const char* func_name, Func func, sycl::queue& queue, int64_t
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto int_res_acc = int_res_buff.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -589,7 +589,7 @@ inline void nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST
             // when the data is on buffer, it must be set to
@@ -639,7 +639,7 @@ inline sycl::event asum(const char* func_name, Func func, sycl::queue& queue, in
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const cuDataType1*>(x);
             auto res_ = reinterpret_cast<cuDataType2*>(result);
@@ -679,7 +679,7 @@ inline sycl::event scal(const char* func_name, Func func, sycl::queue& queue, in
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<cuDataType2*>(x);
             cublasStatus_t err;
@@ -715,7 +715,7 @@ inline sycl::event axpy(const char* func_name, Func func, sycl::queue& queue, in
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
             auto y_ = reinterpret_cast<cuDataType*>(y);
@@ -778,7 +778,7 @@ inline sycl::event rotg(const char* func_name, Func func, sycl::queue& queue, T1
             sycl::get_pointer_type(b, ctx) == sycl::usm::alloc::unknown ||
             sycl::get_pointer_type(c, ctx) == sycl::usm::alloc::unknown ||
             sycl::get_pointer_type(s, ctx) == sycl::usm::alloc::unknown) {
-            throw oneapi::mkl::exception(
+            throw oneapi::math::exception(
                 "blas", "rotg",
                 "If any pointer is only device accessible, all must be device accessible");
         }
@@ -788,7 +788,7 @@ inline sycl::event rotg(const char* func_name, Func func, sycl::queue& queue, T1
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType1*>(a);
             auto b_ = reinterpret_cast<cuDataType1*>(b);
@@ -830,7 +830,7 @@ inline sycl::event rotm(const char* func_name, Func func, sycl::queue& queue, in
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<cuDataType*>(x);
             auto y_ = reinterpret_cast<cuDataType*>(y);
@@ -864,7 +864,7 @@ inline sycl::event copy(const char* func_name, Func func, sycl::queue& queue, in
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
             auto y_ = reinterpret_cast<cuDataType*>(y);
@@ -900,7 +900,7 @@ inline sycl::event dot(const char* func_name, Func func, sycl::queue& queue, int
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
             auto y_ = reinterpret_cast<const cuDataType*>(y);
@@ -946,7 +946,7 @@ inline sycl::event rot(const char* func_name, Func func, sycl::queue& queue, int
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<cuDataType1*>(x);
             auto y_ = reinterpret_cast<cuDataType1*>(y);
@@ -984,7 +984,7 @@ sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int6
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const float*>(x);
             auto y_ = reinterpret_cast<const float*>(y);
@@ -1034,7 +1034,7 @@ inline sycl::event rotmg(const char* func_name, Func func, sycl::queue& queue, T
         if (sycl::get_pointer_type(d1, ctx) == sycl::usm::alloc::unknown ||
             sycl::get_pointer_type(d2, ctx) == sycl::usm::alloc::unknown ||
             sycl::get_pointer_type(x1, ctx) == sycl::usm::alloc::unknown) {
-            throw oneapi::mkl::exception(
+            throw oneapi::math::exception(
                 "blas", "rotmg",
                 "If any pointer is only device accessible, all must be device accessible");
         }
@@ -1049,7 +1049,7 @@ inline sycl::event rotmg(const char* func_name, Func func, sycl::queue& queue, T
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto d1_ = reinterpret_cast<cuDataType*>(d1);
             auto d2_ = reinterpret_cast<cuDataType*>(d2);
@@ -1111,7 +1111,7 @@ inline sycl::event iamax(const char* func_name, Func func, sycl::queue& queue, i
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
             if (result_on_device) {
@@ -1163,7 +1163,7 @@ inline sycl::event swap(const char* func_name, Func func, sycl::queue& queue, in
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<cuDataType*>(x);
             auto y_ = reinterpret_cast<cuDataType*>(y);
@@ -1212,7 +1212,7 @@ inline sycl::event iamin(const char* func_name, Func func, sycl::queue& queue, i
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
             if (result_on_device) {
@@ -1268,7 +1268,7 @@ inline sycl::event nrm2(const char* func_name, Func func, sycl::queue& queue, in
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = reinterpret_cast<const cuDataType1*>(x);
             auto res_ = reinterpret_cast<cuDataType2*>(result);
@@ -1849,5 +1849,5 @@ NRM2_LAUNCHER_USM(std::complex<double>, double, cublasDznrm2)
 } // namespace row_major
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/cublas/cublas_level2.cpp b/src/blas/backends/cublas/cublas_level2.cpp
index 904f8d7e6..87adc7fab 100644
--- a/src/blas/backends/cublas/cublas_level2.cpp
+++ b/src/blas/backends/cublas/cublas_level2.cpp
@@ -19,11 +19,11 @@
 #include "cublas_helper.hpp"
 #include "cublas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 namespace column_major {
@@ -40,7 +40,7 @@ inline void gemv(const char* func_name, Func func, sycl::queue& queue, transpose
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -77,7 +77,7 @@ inline void gbmv(const char* func_name, Func func, sycl::queue& queue, transpose
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -114,7 +114,7 @@ inline void ger(const char* func_name, Func func, sycl::queue& queue, int64_t m,
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -151,7 +151,7 @@ inline void hbmv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -186,7 +186,7 @@ inline void hemv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -222,7 +222,7 @@ inline void her(const char* func_name, Func func, sycl::queue& queue, uplo upper
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -256,7 +256,7 @@ inline void her2(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -292,7 +292,7 @@ inline void hpmv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -328,7 +328,7 @@ inline void hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -361,7 +361,7 @@ inline void hpr2(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -396,7 +396,7 @@ inline void sbmv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -432,7 +432,7 @@ inline void symv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -466,7 +466,7 @@ inline void syr(const char* func_name, Func func, sycl::queue& queue, uplo upper
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -501,7 +501,7 @@ inline void syr2(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -540,7 +540,7 @@ inline void spmv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -574,7 +574,7 @@ inline void spr(const char* func_name, Func func, sycl::queue& queue, uplo upper
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -607,7 +607,7 @@ inline void spr2(const char* func_name, Func func, sycl::queue& queue, uplo uppe
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -641,7 +641,7 @@ inline void tbmv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -677,7 +677,7 @@ inline void tbsv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -713,7 +713,7 @@ inline void tpmv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -748,7 +748,7 @@ inline void tpsv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -783,7 +783,7 @@ inline void trmv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -818,7 +818,7 @@ inline void trsv(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto x_ = sc.get_mem<cuDataType*>(x_acc);
@@ -858,7 +858,7 @@ inline sycl::event gemv(const char* func_name, Func func, sycl::queue& queue, tr
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -898,7 +898,7 @@ inline sycl::event gbmv(const char* func_name, Func func, sycl::queue& queue, tr
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -938,7 +938,7 @@ inline sycl::event ger(const char* func_name, Func func, sycl::queue& queue, int
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -979,7 +979,7 @@ inline sycl::event hbmv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1016,7 +1016,7 @@ inline sycl::event hemv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1055,7 +1055,7 @@ inline sycl::event her(const char* func_name, Func func, sycl::queue& queue, upl
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1092,7 +1092,7 @@ inline sycl::event her2(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1130,7 +1130,7 @@ inline sycl::event hpmv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1169,7 +1169,7 @@ inline sycl::event hpr(const char* func_name, Func func, sycl::queue& queue, upl
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1206,7 +1206,7 @@ inline sycl::event hpr2(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1245,7 +1245,7 @@ inline sycl::event sbmv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1283,7 +1283,7 @@ inline sycl::event symv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1321,7 +1321,7 @@ inline sycl::event syr(const char* func_name, Func func, sycl::queue& queue, upl
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1360,7 +1360,7 @@ inline sycl::event syr2(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1401,7 +1401,7 @@ inline sycl::event spmv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1439,7 +1439,7 @@ inline sycl::event spr(const char* func_name, Func func, sycl::queue& queue, upl
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1475,7 +1475,7 @@ inline sycl::event spr2(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto x_ = reinterpret_cast<const cuDataType*>(x);
@@ -1514,7 +1514,7 @@ inline sycl::event tbmv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<cuDataType*>(x);
@@ -1554,7 +1554,7 @@ inline sycl::event tbsv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<cuDataType*>(x);
@@ -1593,7 +1593,7 @@ inline sycl::event tpmv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<cuDataType*>(x);
@@ -1632,7 +1632,7 @@ inline sycl::event tpsv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<cuDataType*>(x);
@@ -1671,7 +1671,7 @@ inline sycl::event trmv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<cuDataType*>(x);
@@ -1710,7 +1710,7 @@ inline sycl::event trsv(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto x_ = reinterpret_cast<cuDataType*>(x);
@@ -2698,5 +2698,5 @@ TRSV_LAUNCHER_USM(std::complex<double>, cublasZtrsv)
 } // namespace row_major
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/cublas/cublas_level3.cpp b/src/blas/backends/cublas/cublas_level3.cpp
index 66aad9c6b..58d1f4273 100644
--- a/src/blas/backends/cublas/cublas_level3.cpp
+++ b/src/blas/backends/cublas/cublas_level3.cpp
@@ -19,11 +19,11 @@
 #include "cublas_helper.hpp"
 #include "cublas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 namespace column_major {
@@ -41,7 +41,7 @@ inline void gemm(const char* func_name, Func func, sycl::queue& queue, transpose
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -82,13 +82,13 @@ inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::que
     overflow_check(m, n, k, lda, ldb, ldc);
     queue.submit([&](sycl::handler& cgh) {
         if (!verify_support<sycl::half, T_A, T_B, T_C>(queue, sycl::aspect::fp16)) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "blas", "sycl::half", "half is not supported by the device or the sycl compiler");
         }
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType_A*>(a_acc);
             auto b_ = sc.get_mem<cuDataType_B*>(b_acc);
@@ -140,7 +140,7 @@ inline void symm(const char* func_name, Func func, sycl::queue& queue, side left
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -179,7 +179,7 @@ inline void hemm(const char* func_name, Func func, sycl::queue& queue, side left
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -213,7 +213,7 @@ inline void syrk(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto c_ = sc.get_mem<cuDataType*>(c_acc);
@@ -252,7 +252,7 @@ inline void herk(const char* func_name, Func func, sycl::queue& queue, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto c_ = sc.get_mem<cuDataType*>(c_acc);
@@ -289,7 +289,7 @@ inline void syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upp
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -329,7 +329,7 @@ inline void her2k(const char* func_name, Func func, sycl::queue& queue, uplo upp
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -370,7 +370,7 @@ inline void trmm(const char* func_name, Func func, sycl::queue& queue, side left
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -406,7 +406,7 @@ inline void trsm(const char* func_name, Func func, sycl::queue& queue, side left
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -447,7 +447,7 @@ inline sycl::event gemm(const char* func_name, Func func, sycl::queue& queue, tr
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<const cuDataType*>(b);
@@ -493,7 +493,7 @@ inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType_A*>(a);
             auto b_ = reinterpret_cast<const cuDataType_B*>(b);
@@ -549,7 +549,7 @@ inline sycl::event symm(const char* func_name, Func func, sycl::queue& queue, si
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<const cuDataType*>(b);
@@ -591,7 +591,7 @@ inline sycl::event hemm(const char* func_name, Func func, sycl::queue& queue, si
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<const cuDataType*>(b);
@@ -629,7 +629,7 @@ inline sycl::event syrk(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto c_ = reinterpret_cast<cuDataType*>(c);
@@ -671,7 +671,7 @@ inline sycl::event herk(const char* func_name, Func func, sycl::queue& queue, up
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto c_ = reinterpret_cast<cuDataType*>(c);
@@ -711,7 +711,7 @@ inline sycl::event syr2k(const char* func_name, Func func, sycl::queue& queue, u
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<const cuDataType*>(b);
@@ -755,7 +755,7 @@ inline sycl::event her2k(const char* func_name, Func func, sycl::queue& queue, u
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<const cuDataType*>(b);
@@ -800,7 +800,7 @@ inline sycl::event trmm(const char* func_name, Func func, sycl::queue& queue, si
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<cuDataType*>(b);
@@ -840,7 +840,7 @@ inline sycl::event trsm(const char* func_name, Func func, sycl::queue& queue, si
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
+        onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<const cuDataType*>(a);
             auto b_ = reinterpret_cast<cuDataType*>(b);
@@ -1346,5 +1346,5 @@ TRSM_LAUNCHER_USM(std::complex<double>, cublasZtrsm)
 } // namespace row_major
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/cublas/cublas_scope_handle.cpp b/src/blas/backends/cublas/cublas_scope_handle.cpp
index 812d89d31..07d260038 100644
--- a/src/blas/backends/cublas/cublas_scope_handle.cpp
+++ b/src/blas/backends/cublas/cublas_scope_handle.cpp
@@ -19,7 +19,7 @@
 #include "cublas_scope_handle.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 
@@ -69,5 +69,5 @@ sycl::context CublasScopedContextHandler::get_context(const sycl::queue& queue)
 
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/cublas/cublas_scope_handle.hpp b/src/blas/backends/cublas/cublas_scope_handle.hpp
index 2f6027478..6633dcecc 100644
--- a/src/blas/backends/cublas/cublas_scope_handle.hpp
+++ b/src/blas/backends/cublas/cublas_scope_handle.hpp
@@ -30,7 +30,7 @@
 #include "cublas_handle.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 
@@ -92,6 +92,6 @@ class CublasScopedContextHandler {
 
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif //_CUBLAS_SCOPED_HANDLE_HPP_
diff --git a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp
index 8822151dd..d2561a14b 100644
--- a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp
+++ b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp
@@ -20,7 +20,7 @@
 #include "cublas_handle.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 
@@ -60,5 +60,5 @@ CUstream CublasScopedContextHandler::get_stream(const sycl::queue& queue) {
 
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp
index 84b28e0fd..82075007d 100644
--- a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp
+++ b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp
@@ -28,7 +28,7 @@
 #include "cublas_helper.hpp"
 #include "cublas_handle.hpp"
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 
@@ -78,6 +78,6 @@ class CublasScopedContextHandler {
 
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif //CUBLAS_SCOPED_HANDLE_HIPSYCL_HPP
diff --git a/src/blas/backends/cublas/cublas_task.hpp b/src/blas/backends/cublas/cublas_task.hpp
index ae95e6eb1..a1106936f 100644
--- a/src/blas/backends/cublas/cublas_task.hpp
+++ b/src/blas/backends/cublas/cublas_task.hpp
@@ -19,8 +19,8 @@
 *
 **************************************************************************/
 
-#ifndef _MKL_BLAS_CUBLAS_TASK_HPP_
-#define _MKL_BLAS_CUBLAS_TASK_HPP_
+#ifndef ONEMATH_BLAS_CUBLAS_TASK_HPP_
+#define ONEMATH_BLAS_CUBLAS_TASK_HPP_
 #include <cublas_v2.h>
 #include <cuda.h>
 #include <complex>
@@ -29,7 +29,7 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #ifndef __HIPSYCL__
 #include "cublas_scope_handle.hpp"
 #else
@@ -40,7 +40,7 @@ using interop_handler = sycl::interop_handle;
 }
 #endif
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace cublas {
 
@@ -66,12 +66,12 @@ static inline void host_task_internal(H& cgh, sycl::queue queue, F f) {
 }
 #endif
 template <typename H, typename F>
-static inline void onemkl_cublas_host_task(H& cgh, sycl::queue queue, F f) {
+static inline void onemath_cublas_host_task(H& cgh, sycl::queue queue, F f) {
     (void)host_task_internal(cgh, queue, f);
 }
 
 } // namespace cublas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
-#endif // _MKL_BLAS_CUBLAS_TASK_HPP_
+#endif // ONEMATH_BLAS_CUBLAS_TASK_HPP_
diff --git a/src/blas/backends/cublas/cublas_wrappers.cpp b/src/blas/backends/cublas/cublas_wrappers.cpp
index ee5c7239f..993abf84a 100644
--- a/src/blas/backends/cublas/cublas_wrappers.cpp
+++ b/src/blas/backends/cublas/cublas_wrappers.cpp
@@ -17,990 +17,990 @@
 *
 **************************************************************************/
 #include "blas/function_table.hpp"
-#include "oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hpp"
+#include "oneapi/math/blas/detail/cublas/onemath_blas_cublas.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" blas_function_table_t mkl_blas_table = {
+extern "C" blas_function_table_t onemath_blas_table = {
     WRAPPER_VERSION,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::dot,
-    oneapi::mkl::blas::cublas::column_major::dot,
-    oneapi::mkl::blas::cublas::column_major::dot,
-    oneapi::mkl::blas::cublas::column_major::dotc,
-    oneapi::mkl::blas::cublas::column_major::dotc,
-    oneapi::mkl::blas::cublas::column_major::dotu,
-    oneapi::mkl::blas::cublas::column_major::dotu,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotm,
-    oneapi::mkl::blas::cublas::column_major::rotm,
-    oneapi::mkl::blas::cublas::column_major::rotmg,
-    oneapi::mkl::blas::cublas::column_major::rotmg,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::sdsdot,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::ger,
-    oneapi::mkl::blas::cublas::column_major::ger,
-    oneapi::mkl::blas::cublas::column_major::gerc,
-    oneapi::mkl::blas::cublas::column_major::gerc,
-    oneapi::mkl::blas::cublas::column_major::geru,
-    oneapi::mkl::blas::cublas::column_major::geru,
-    oneapi::mkl::blas::cublas::column_major::hbmv,
-    oneapi::mkl::blas::cublas::column_major::hbmv,
-    oneapi::mkl::blas::cublas::column_major::hemv,
-    oneapi::mkl::blas::cublas::column_major::hemv,
-    oneapi::mkl::blas::cublas::column_major::her,
-    oneapi::mkl::blas::cublas::column_major::her,
-    oneapi::mkl::blas::cublas::column_major::her2,
-    oneapi::mkl::blas::cublas::column_major::her2,
-    oneapi::mkl::blas::cublas::column_major::hpmv,
-    oneapi::mkl::blas::cublas::column_major::hpmv,
-    oneapi::mkl::blas::cublas::column_major::hpr,
-    oneapi::mkl::blas::cublas::column_major::hpr,
-    oneapi::mkl::blas::cublas::column_major::hpr2,
-    oneapi::mkl::blas::cublas::column_major::hpr2,
-    oneapi::mkl::blas::cublas::column_major::sbmv,
-    oneapi::mkl::blas::cublas::column_major::sbmv,
-    oneapi::mkl::blas::cublas::column_major::spmv,
-    oneapi::mkl::blas::cublas::column_major::spmv,
-    oneapi::mkl::blas::cublas::column_major::spr,
-    oneapi::mkl::blas::cublas::column_major::spr,
-    oneapi::mkl::blas::cublas::column_major::spr2,
-    oneapi::mkl::blas::cublas::column_major::spr2,
-    oneapi::mkl::blas::cublas::column_major::symv,
-    oneapi::mkl::blas::cublas::column_major::symv,
-    oneapi::mkl::blas::cublas::column_major::syr,
-    oneapi::mkl::blas::cublas::column_major::syr,
-    oneapi::mkl::blas::cublas::column_major::syr2,
-    oneapi::mkl::blas::cublas::column_major::syr2,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::hemm,
-    oneapi::mkl::blas::cublas::column_major::hemm,
-    oneapi::mkl::blas::cublas::column_major::herk,
-    oneapi::mkl::blas::cublas::column_major::herk,
-    oneapi::mkl::blas::cublas::column_major::her2k,
-    oneapi::mkl::blas::cublas::column_major::her2k,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::asum,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpy_batch,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::axpby,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::copy_batch,
-    oneapi::mkl::blas::cublas::column_major::dot,
-    oneapi::mkl::blas::cublas::column_major::dot,
-    oneapi::mkl::blas::cublas::column_major::dot,
-    oneapi::mkl::blas::cublas::column_major::dotc,
-    oneapi::mkl::blas::cublas::column_major::dotc,
-    oneapi::mkl::blas::cublas::column_major::dotu,
-    oneapi::mkl::blas::cublas::column_major::dotu,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamin,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::iamax,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::nrm2,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rot,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotg,
-    oneapi::mkl::blas::cublas::column_major::rotm,
-    oneapi::mkl::blas::cublas::column_major::rotm,
-    oneapi::mkl::blas::cublas::column_major::rotmg,
-    oneapi::mkl::blas::cublas::column_major::rotmg,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::scal,
-    oneapi::mkl::blas::cublas::column_major::sdsdot,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::swap,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gbmv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::gemv_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::column_major::ger,
-    oneapi::mkl::blas::cublas::column_major::ger,
-    oneapi::mkl::blas::cublas::column_major::gerc,
-    oneapi::mkl::blas::cublas::column_major::gerc,
-    oneapi::mkl::blas::cublas::column_major::geru,
-    oneapi::mkl::blas::cublas::column_major::geru,
-    oneapi::mkl::blas::cublas::column_major::hbmv,
-    oneapi::mkl::blas::cublas::column_major::hbmv,
-    oneapi::mkl::blas::cublas::column_major::hemv,
-    oneapi::mkl::blas::cublas::column_major::hemv,
-    oneapi::mkl::blas::cublas::column_major::her,
-    oneapi::mkl::blas::cublas::column_major::her,
-    oneapi::mkl::blas::cublas::column_major::her2,
-    oneapi::mkl::blas::cublas::column_major::her2,
-    oneapi::mkl::blas::cublas::column_major::hpmv,
-    oneapi::mkl::blas::cublas::column_major::hpmv,
-    oneapi::mkl::blas::cublas::column_major::hpr,
-    oneapi::mkl::blas::cublas::column_major::hpr,
-    oneapi::mkl::blas::cublas::column_major::hpr2,
-    oneapi::mkl::blas::cublas::column_major::hpr2,
-    oneapi::mkl::blas::cublas::column_major::sbmv,
-    oneapi::mkl::blas::cublas::column_major::sbmv,
-    oneapi::mkl::blas::cublas::column_major::spmv,
-    oneapi::mkl::blas::cublas::column_major::spmv,
-    oneapi::mkl::blas::cublas::column_major::spr,
-    oneapi::mkl::blas::cublas::column_major::spr,
-    oneapi::mkl::blas::cublas::column_major::spr2,
-    oneapi::mkl::blas::cublas::column_major::spr2,
-    oneapi::mkl::blas::cublas::column_major::symv,
-    oneapi::mkl::blas::cublas::column_major::symv,
-    oneapi::mkl::blas::cublas::column_major::syr,
-    oneapi::mkl::blas::cublas::column_major::syr,
-    oneapi::mkl::blas::cublas::column_major::syr2,
-    oneapi::mkl::blas::cublas::column_major::syr2,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbmv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tbsv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpmv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::tpsv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trmv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::trsv,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::gemm,
-    oneapi::mkl::blas::cublas::column_major::hemm,
-    oneapi::mkl::blas::cublas::column_major::hemm,
-    oneapi::mkl::blas::cublas::column_major::herk,
-    oneapi::mkl::blas::cublas::column_major::herk,
-    oneapi::mkl::blas::cublas::column_major::her2k,
-    oneapi::mkl::blas::cublas::column_major::her2k,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::symm,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syrk_batch,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::syr2k,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trmm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::trsm,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::trsm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemm_batch,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemmt,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::gemm_bias,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::omatcopy2,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::imatcopy,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::omatadd,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::dot,
-    oneapi::mkl::blas::cublas::row_major::dot,
-    oneapi::mkl::blas::cublas::row_major::dot,
-    oneapi::mkl::blas::cublas::row_major::dotc,
-    oneapi::mkl::blas::cublas::row_major::dotc,
-    oneapi::mkl::blas::cublas::row_major::dotu,
-    oneapi::mkl::blas::cublas::row_major::dotu,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotm,
-    oneapi::mkl::blas::cublas::row_major::rotm,
-    oneapi::mkl::blas::cublas::row_major::rotmg,
-    oneapi::mkl::blas::cublas::row_major::rotmg,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::sdsdot,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::ger,
-    oneapi::mkl::blas::cublas::row_major::ger,
-    oneapi::mkl::blas::cublas::row_major::gerc,
-    oneapi::mkl::blas::cublas::row_major::gerc,
-    oneapi::mkl::blas::cublas::row_major::geru,
-    oneapi::mkl::blas::cublas::row_major::geru,
-    oneapi::mkl::blas::cublas::row_major::hbmv,
-    oneapi::mkl::blas::cublas::row_major::hbmv,
-    oneapi::mkl::blas::cublas::row_major::hemv,
-    oneapi::mkl::blas::cublas::row_major::hemv,
-    oneapi::mkl::blas::cublas::row_major::her,
-    oneapi::mkl::blas::cublas::row_major::her,
-    oneapi::mkl::blas::cublas::row_major::her2,
-    oneapi::mkl::blas::cublas::row_major::her2,
-    oneapi::mkl::blas::cublas::row_major::hpmv,
-    oneapi::mkl::blas::cublas::row_major::hpmv,
-    oneapi::mkl::blas::cublas::row_major::hpr,
-    oneapi::mkl::blas::cublas::row_major::hpr,
-    oneapi::mkl::blas::cublas::row_major::hpr2,
-    oneapi::mkl::blas::cublas::row_major::hpr2,
-    oneapi::mkl::blas::cublas::row_major::sbmv,
-    oneapi::mkl::blas::cublas::row_major::sbmv,
-    oneapi::mkl::blas::cublas::row_major::spmv,
-    oneapi::mkl::blas::cublas::row_major::spmv,
-    oneapi::mkl::blas::cublas::row_major::spr,
-    oneapi::mkl::blas::cublas::row_major::spr,
-    oneapi::mkl::blas::cublas::row_major::spr2,
-    oneapi::mkl::blas::cublas::row_major::spr2,
-    oneapi::mkl::blas::cublas::row_major::symv,
-    oneapi::mkl::blas::cublas::row_major::symv,
-    oneapi::mkl::blas::cublas::row_major::syr,
-    oneapi::mkl::blas::cublas::row_major::syr,
-    oneapi::mkl::blas::cublas::row_major::syr2,
-    oneapi::mkl::blas::cublas::row_major::syr2,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::hemm,
-    oneapi::mkl::blas::cublas::row_major::hemm,
-    oneapi::mkl::blas::cublas::row_major::herk,
-    oneapi::mkl::blas::cublas::row_major::herk,
-    oneapi::mkl::blas::cublas::row_major::her2k,
-    oneapi::mkl::blas::cublas::row_major::her2k,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::asum,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpy_batch,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::axpby,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::copy_batch,
-    oneapi::mkl::blas::cublas::row_major::dot,
-    oneapi::mkl::blas::cublas::row_major::dot,
-    oneapi::mkl::blas::cublas::row_major::dot,
-    oneapi::mkl::blas::cublas::row_major::dotc,
-    oneapi::mkl::blas::cublas::row_major::dotc,
-    oneapi::mkl::blas::cublas::row_major::dotu,
-    oneapi::mkl::blas::cublas::row_major::dotu,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamin,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::iamax,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::nrm2,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rot,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotg,
-    oneapi::mkl::blas::cublas::row_major::rotm,
-    oneapi::mkl::blas::cublas::row_major::rotm,
-    oneapi::mkl::blas::cublas::row_major::rotmg,
-    oneapi::mkl::blas::cublas::row_major::rotmg,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::scal,
-    oneapi::mkl::blas::cublas::row_major::sdsdot,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::swap,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gbmv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::gemv_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::dgmm_batch,
-    oneapi::mkl::blas::cublas::row_major::ger,
-    oneapi::mkl::blas::cublas::row_major::ger,
-    oneapi::mkl::blas::cublas::row_major::gerc,
-    oneapi::mkl::blas::cublas::row_major::gerc,
-    oneapi::mkl::blas::cublas::row_major::geru,
-    oneapi::mkl::blas::cublas::row_major::geru,
-    oneapi::mkl::blas::cublas::row_major::hbmv,
-    oneapi::mkl::blas::cublas::row_major::hbmv,
-    oneapi::mkl::blas::cublas::row_major::hemv,
-    oneapi::mkl::blas::cublas::row_major::hemv,
-    oneapi::mkl::blas::cublas::row_major::her,
-    oneapi::mkl::blas::cublas::row_major::her,
-    oneapi::mkl::blas::cublas::row_major::her2,
-    oneapi::mkl::blas::cublas::row_major::her2,
-    oneapi::mkl::blas::cublas::row_major::hpmv,
-    oneapi::mkl::blas::cublas::row_major::hpmv,
-    oneapi::mkl::blas::cublas::row_major::hpr,
-    oneapi::mkl::blas::cublas::row_major::hpr,
-    oneapi::mkl::blas::cublas::row_major::hpr2,
-    oneapi::mkl::blas::cublas::row_major::hpr2,
-    oneapi::mkl::blas::cublas::row_major::sbmv,
-    oneapi::mkl::blas::cublas::row_major::sbmv,
-    oneapi::mkl::blas::cublas::row_major::spmv,
-    oneapi::mkl::blas::cublas::row_major::spmv,
-    oneapi::mkl::blas::cublas::row_major::spr,
-    oneapi::mkl::blas::cublas::row_major::spr,
-    oneapi::mkl::blas::cublas::row_major::spr2,
-    oneapi::mkl::blas::cublas::row_major::spr2,
-    oneapi::mkl::blas::cublas::row_major::symv,
-    oneapi::mkl::blas::cublas::row_major::symv,
-    oneapi::mkl::blas::cublas::row_major::syr,
-    oneapi::mkl::blas::cublas::row_major::syr,
-    oneapi::mkl::blas::cublas::row_major::syr2,
-    oneapi::mkl::blas::cublas::row_major::syr2,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbmv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tbsv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpmv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::tpsv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trmv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::trsv,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::gemm,
-    oneapi::mkl::blas::cublas::row_major::hemm,
-    oneapi::mkl::blas::cublas::row_major::hemm,
-    oneapi::mkl::blas::cublas::row_major::herk,
-    oneapi::mkl::blas::cublas::row_major::herk,
-    oneapi::mkl::blas::cublas::row_major::her2k,
-    oneapi::mkl::blas::cublas::row_major::her2k,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::symm,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syrk_batch,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::syr2k,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trmm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::trsm,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::trsm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemm_batch,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemmt,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::gemm_bias,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatadd_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::omatcopy2,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::imatcopy,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::omatadd,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::dot,
+    oneapi::math::blas::cublas::column_major::dot,
+    oneapi::math::blas::cublas::column_major::dot,
+    oneapi::math::blas::cublas::column_major::dotc,
+    oneapi::math::blas::cublas::column_major::dotc,
+    oneapi::math::blas::cublas::column_major::dotu,
+    oneapi::math::blas::cublas::column_major::dotu,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotm,
+    oneapi::math::blas::cublas::column_major::rotm,
+    oneapi::math::blas::cublas::column_major::rotmg,
+    oneapi::math::blas::cublas::column_major::rotmg,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::sdsdot,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::ger,
+    oneapi::math::blas::cublas::column_major::ger,
+    oneapi::math::blas::cublas::column_major::gerc,
+    oneapi::math::blas::cublas::column_major::gerc,
+    oneapi::math::blas::cublas::column_major::geru,
+    oneapi::math::blas::cublas::column_major::geru,
+    oneapi::math::blas::cublas::column_major::hbmv,
+    oneapi::math::blas::cublas::column_major::hbmv,
+    oneapi::math::blas::cublas::column_major::hemv,
+    oneapi::math::blas::cublas::column_major::hemv,
+    oneapi::math::blas::cublas::column_major::her,
+    oneapi::math::blas::cublas::column_major::her,
+    oneapi::math::blas::cublas::column_major::her2,
+    oneapi::math::blas::cublas::column_major::her2,
+    oneapi::math::blas::cublas::column_major::hpmv,
+    oneapi::math::blas::cublas::column_major::hpmv,
+    oneapi::math::blas::cublas::column_major::hpr,
+    oneapi::math::blas::cublas::column_major::hpr,
+    oneapi::math::blas::cublas::column_major::hpr2,
+    oneapi::math::blas::cublas::column_major::hpr2,
+    oneapi::math::blas::cublas::column_major::sbmv,
+    oneapi::math::blas::cublas::column_major::sbmv,
+    oneapi::math::blas::cublas::column_major::spmv,
+    oneapi::math::blas::cublas::column_major::spmv,
+    oneapi::math::blas::cublas::column_major::spr,
+    oneapi::math::blas::cublas::column_major::spr,
+    oneapi::math::blas::cublas::column_major::spr2,
+    oneapi::math::blas::cublas::column_major::spr2,
+    oneapi::math::blas::cublas::column_major::symv,
+    oneapi::math::blas::cublas::column_major::symv,
+    oneapi::math::blas::cublas::column_major::syr,
+    oneapi::math::blas::cublas::column_major::syr,
+    oneapi::math::blas::cublas::column_major::syr2,
+    oneapi::math::blas::cublas::column_major::syr2,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::hemm,
+    oneapi::math::blas::cublas::column_major::hemm,
+    oneapi::math::blas::cublas::column_major::herk,
+    oneapi::math::blas::cublas::column_major::herk,
+    oneapi::math::blas::cublas::column_major::her2k,
+    oneapi::math::blas::cublas::column_major::her2k,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::asum,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpy_batch,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::axpby,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::copy_batch,
+    oneapi::math::blas::cublas::column_major::dot,
+    oneapi::math::blas::cublas::column_major::dot,
+    oneapi::math::blas::cublas::column_major::dot,
+    oneapi::math::blas::cublas::column_major::dotc,
+    oneapi::math::blas::cublas::column_major::dotc,
+    oneapi::math::blas::cublas::column_major::dotu,
+    oneapi::math::blas::cublas::column_major::dotu,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamin,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::iamax,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::nrm2,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rot,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotg,
+    oneapi::math::blas::cublas::column_major::rotm,
+    oneapi::math::blas::cublas::column_major::rotm,
+    oneapi::math::blas::cublas::column_major::rotmg,
+    oneapi::math::blas::cublas::column_major::rotmg,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::scal,
+    oneapi::math::blas::cublas::column_major::sdsdot,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::swap,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gbmv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::gemv_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::dgmm_batch,
+    oneapi::math::blas::cublas::column_major::ger,
+    oneapi::math::blas::cublas::column_major::ger,
+    oneapi::math::blas::cublas::column_major::gerc,
+    oneapi::math::blas::cublas::column_major::gerc,
+    oneapi::math::blas::cublas::column_major::geru,
+    oneapi::math::blas::cublas::column_major::geru,
+    oneapi::math::blas::cublas::column_major::hbmv,
+    oneapi::math::blas::cublas::column_major::hbmv,
+    oneapi::math::blas::cublas::column_major::hemv,
+    oneapi::math::blas::cublas::column_major::hemv,
+    oneapi::math::blas::cublas::column_major::her,
+    oneapi::math::blas::cublas::column_major::her,
+    oneapi::math::blas::cublas::column_major::her2,
+    oneapi::math::blas::cublas::column_major::her2,
+    oneapi::math::blas::cublas::column_major::hpmv,
+    oneapi::math::blas::cublas::column_major::hpmv,
+    oneapi::math::blas::cublas::column_major::hpr,
+    oneapi::math::blas::cublas::column_major::hpr,
+    oneapi::math::blas::cublas::column_major::hpr2,
+    oneapi::math::blas::cublas::column_major::hpr2,
+    oneapi::math::blas::cublas::column_major::sbmv,
+    oneapi::math::blas::cublas::column_major::sbmv,
+    oneapi::math::blas::cublas::column_major::spmv,
+    oneapi::math::blas::cublas::column_major::spmv,
+    oneapi::math::blas::cublas::column_major::spr,
+    oneapi::math::blas::cublas::column_major::spr,
+    oneapi::math::blas::cublas::column_major::spr2,
+    oneapi::math::blas::cublas::column_major::spr2,
+    oneapi::math::blas::cublas::column_major::symv,
+    oneapi::math::blas::cublas::column_major::symv,
+    oneapi::math::blas::cublas::column_major::syr,
+    oneapi::math::blas::cublas::column_major::syr,
+    oneapi::math::blas::cublas::column_major::syr2,
+    oneapi::math::blas::cublas::column_major::syr2,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbmv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tbsv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpmv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::tpsv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trmv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::trsv,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::gemm,
+    oneapi::math::blas::cublas::column_major::hemm,
+    oneapi::math::blas::cublas::column_major::hemm,
+    oneapi::math::blas::cublas::column_major::herk,
+    oneapi::math::blas::cublas::column_major::herk,
+    oneapi::math::blas::cublas::column_major::her2k,
+    oneapi::math::blas::cublas::column_major::her2k,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::symm,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syrk_batch,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::syr2k,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trmm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::trsm,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::trsm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemm_batch,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemmt,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::gemm_bias,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatadd_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::omatcopy2,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::imatcopy,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::omatadd,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::omatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::column_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::dot,
+    oneapi::math::blas::cublas::row_major::dot,
+    oneapi::math::blas::cublas::row_major::dot,
+    oneapi::math::blas::cublas::row_major::dotc,
+    oneapi::math::blas::cublas::row_major::dotc,
+    oneapi::math::blas::cublas::row_major::dotu,
+    oneapi::math::blas::cublas::row_major::dotu,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotm,
+    oneapi::math::blas::cublas::row_major::rotm,
+    oneapi::math::blas::cublas::row_major::rotmg,
+    oneapi::math::blas::cublas::row_major::rotmg,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::sdsdot,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::ger,
+    oneapi::math::blas::cublas::row_major::ger,
+    oneapi::math::blas::cublas::row_major::gerc,
+    oneapi::math::blas::cublas::row_major::gerc,
+    oneapi::math::blas::cublas::row_major::geru,
+    oneapi::math::blas::cublas::row_major::geru,
+    oneapi::math::blas::cublas::row_major::hbmv,
+    oneapi::math::blas::cublas::row_major::hbmv,
+    oneapi::math::blas::cublas::row_major::hemv,
+    oneapi::math::blas::cublas::row_major::hemv,
+    oneapi::math::blas::cublas::row_major::her,
+    oneapi::math::blas::cublas::row_major::her,
+    oneapi::math::blas::cublas::row_major::her2,
+    oneapi::math::blas::cublas::row_major::her2,
+    oneapi::math::blas::cublas::row_major::hpmv,
+    oneapi::math::blas::cublas::row_major::hpmv,
+    oneapi::math::blas::cublas::row_major::hpr,
+    oneapi::math::blas::cublas::row_major::hpr,
+    oneapi::math::blas::cublas::row_major::hpr2,
+    oneapi::math::blas::cublas::row_major::hpr2,
+    oneapi::math::blas::cublas::row_major::sbmv,
+    oneapi::math::blas::cublas::row_major::sbmv,
+    oneapi::math::blas::cublas::row_major::spmv,
+    oneapi::math::blas::cublas::row_major::spmv,
+    oneapi::math::blas::cublas::row_major::spr,
+    oneapi::math::blas::cublas::row_major::spr,
+    oneapi::math::blas::cublas::row_major::spr2,
+    oneapi::math::blas::cublas::row_major::spr2,
+    oneapi::math::blas::cublas::row_major::symv,
+    oneapi::math::blas::cublas::row_major::symv,
+    oneapi::math::blas::cublas::row_major::syr,
+    oneapi::math::blas::cublas::row_major::syr,
+    oneapi::math::blas::cublas::row_major::syr2,
+    oneapi::math::blas::cublas::row_major::syr2,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::hemm,
+    oneapi::math::blas::cublas::row_major::hemm,
+    oneapi::math::blas::cublas::row_major::herk,
+    oneapi::math::blas::cublas::row_major::herk,
+    oneapi::math::blas::cublas::row_major::her2k,
+    oneapi::math::blas::cublas::row_major::her2k,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::asum,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpy_batch,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::axpby,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::copy_batch,
+    oneapi::math::blas::cublas::row_major::dot,
+    oneapi::math::blas::cublas::row_major::dot,
+    oneapi::math::blas::cublas::row_major::dot,
+    oneapi::math::blas::cublas::row_major::dotc,
+    oneapi::math::blas::cublas::row_major::dotc,
+    oneapi::math::blas::cublas::row_major::dotu,
+    oneapi::math::blas::cublas::row_major::dotu,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamin,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::iamax,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::nrm2,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rot,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotg,
+    oneapi::math::blas::cublas::row_major::rotm,
+    oneapi::math::blas::cublas::row_major::rotm,
+    oneapi::math::blas::cublas::row_major::rotmg,
+    oneapi::math::blas::cublas::row_major::rotmg,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::scal,
+    oneapi::math::blas::cublas::row_major::sdsdot,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::swap,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gbmv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::gemv_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::dgmm_batch,
+    oneapi::math::blas::cublas::row_major::ger,
+    oneapi::math::blas::cublas::row_major::ger,
+    oneapi::math::blas::cublas::row_major::gerc,
+    oneapi::math::blas::cublas::row_major::gerc,
+    oneapi::math::blas::cublas::row_major::geru,
+    oneapi::math::blas::cublas::row_major::geru,
+    oneapi::math::blas::cublas::row_major::hbmv,
+    oneapi::math::blas::cublas::row_major::hbmv,
+    oneapi::math::blas::cublas::row_major::hemv,
+    oneapi::math::blas::cublas::row_major::hemv,
+    oneapi::math::blas::cublas::row_major::her,
+    oneapi::math::blas::cublas::row_major::her,
+    oneapi::math::blas::cublas::row_major::her2,
+    oneapi::math::blas::cublas::row_major::her2,
+    oneapi::math::blas::cublas::row_major::hpmv,
+    oneapi::math::blas::cublas::row_major::hpmv,
+    oneapi::math::blas::cublas::row_major::hpr,
+    oneapi::math::blas::cublas::row_major::hpr,
+    oneapi::math::blas::cublas::row_major::hpr2,
+    oneapi::math::blas::cublas::row_major::hpr2,
+    oneapi::math::blas::cublas::row_major::sbmv,
+    oneapi::math::blas::cublas::row_major::sbmv,
+    oneapi::math::blas::cublas::row_major::spmv,
+    oneapi::math::blas::cublas::row_major::spmv,
+    oneapi::math::blas::cublas::row_major::spr,
+    oneapi::math::blas::cublas::row_major::spr,
+    oneapi::math::blas::cublas::row_major::spr2,
+    oneapi::math::blas::cublas::row_major::spr2,
+    oneapi::math::blas::cublas::row_major::symv,
+    oneapi::math::blas::cublas::row_major::symv,
+    oneapi::math::blas::cublas::row_major::syr,
+    oneapi::math::blas::cublas::row_major::syr,
+    oneapi::math::blas::cublas::row_major::syr2,
+    oneapi::math::blas::cublas::row_major::syr2,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbmv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tbsv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpmv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::tpsv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trmv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::trsv,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::gemm,
+    oneapi::math::blas::cublas::row_major::hemm,
+    oneapi::math::blas::cublas::row_major::hemm,
+    oneapi::math::blas::cublas::row_major::herk,
+    oneapi::math::blas::cublas::row_major::herk,
+    oneapi::math::blas::cublas::row_major::her2k,
+    oneapi::math::blas::cublas::row_major::her2k,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::symm,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syrk_batch,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::syr2k,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trmm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::trsm,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::trsm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemm_batch,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemmt,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::gemm_bias,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatadd_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::omatcopy2,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::imatcopy,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::omatadd,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::omatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
+    oneapi::math::blas::cublas::row_major::imatcopy_batch,
 };
diff --git a/src/blas/backends/mkl_common/mkl_batch.cxx b/src/blas/backends/mkl_common/mkl_batch.cxx
index 4bd9076b8..cb97b9693 100644
--- a/src/blas/backends/mkl_common/mkl_batch.cxx
+++ b/src/blas/backends/mkl_common/mkl_batch.cxx
@@ -22,67 +22,77 @@
 void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer<float, 1>& x, int64_t incx,
                 std::int64_t stridex, sycl::buffer<float, 1>& y, int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
-    blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer<double, 1>& x, int64_t incx,
                 std::int64_t stridex, sycl::buffer<double, 1>& y, int64_t incy,
                 std::int64_t stridey, std::int64_t batch_size) {
-    blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer<std::complex<float>, 1>& x,
                 int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer<std::complex<double>, 1>& x,
                 int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
-    blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer<double, 1>& x,
                 int64_t incx, int64_t stridex, sycl::buffer<double, 1>& y, int64_t incy,
                 int64_t stridey, int64_t batch_size) {
-    blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer<float, 1>& x, int64_t incx,
                 int64_t stridex, sycl::buffer<float, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void axpy_batch(sycl::queue& queue, int64_t n, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void axpy_batch(sycl::queue& queue, int64_t n, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& x, int64_t incx, int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, int64_t incy, int64_t stridey,
                 int64_t batch_size) {
-    blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size));
 }
 
 void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha,
                 sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a, sycl::buffer<float, 1>& x,
                 int64_t incx, int64_t stride_x, float beta, sycl::buffer<float, 1>& y, int64_t incy,
                 int64_t stride_y, int64_t batch_size) {
-    blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y,
-                           incy, stride_y, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m,
+                                                     n, alpha, a, lda, stride_a, x, incx, stride_x,
+                                                     beta, y, incy, stride_y, batch_size));
 }
 
 void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha,
                 sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<double, 1>& x, int64_t incx, int64_t stride_x, double beta,
                 sycl::buffer<double, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) {
-    blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y,
-                           incy, stride_y, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m,
+                                                     n, alpha, a, lda, stride_a, x, incx, stride_x,
+                                                     beta, y, incy, stride_y, batch_size));
 }
 
 void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n,
@@ -90,8 +100,9 @@ void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n,
                 int64_t stride_a, sycl::buffer<std::complex<float>, 1>& x, int64_t incx,
                 int64_t stride_x, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
                 int64_t incy, int64_t stride_y, int64_t batch_size) {
-    blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y,
-                           incy, stride_y, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m,
+                                                     n, alpha, a, lda, stride_a, x, incx, stride_x,
+                                                     beta, y, incy, stride_y, batch_size));
 }
 
 void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n,
@@ -100,24 +111,27 @@ void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n,
                 int64_t stride_x, std::complex<double> beta,
                 sycl::buffer<std::complex<double>, 1>& y, int64_t incy, int64_t stride_y,
                 int64_t batch_size) {
-    blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y,
-                           incy, stride_y, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m,
+                                                     n, alpha, a, lda, stride_a, x, incx, stride_x,
+                                                     beta, y, incy, stride_y, batch_size));
 }
 
 void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
                 sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a, sycl::buffer<float, 1>& x,
                 int64_t incx, int64_t stride_x, sycl::buffer<float, 1>& c, int64_t ldc,
                 int64_t stride_c, int64_t batch_size) {
-    blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m,
+                                                     n, a, lda, stride_a, x, incx, stride_x, c, ldc,
+                                                     stride_c, batch_size));
 }
 
 void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
                 sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<double, 1>& x, int64_t incx, int64_t stride_x,
                 sycl::buffer<double, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m,
+                                                     n, a, lda, stride_a, x, incx, stride_x, c, ldc,
+                                                     stride_c, batch_size));
 }
 
 void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
@@ -125,8 +139,9 @@ void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, int64_t incx, int64_t stride_x,
                 sycl::buffer<std::complex<float>, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m,
+                                                     n, a, lda, stride_a, x, incx, stride_x, c, ldc,
+                                                     stride_c, batch_size));
 }
 
 void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
@@ -134,24 +149,27 @@ void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, int64_t incx, int64_t stride_x,
                 sycl::buffer<std::complex<double>, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m,
+                                                     n, a, lda, stride_a, x, incx, stride_x, c, ldc,
+                                                     stride_c, batch_size));
 }
 
 void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
                 int64_t k, float alpha, sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<float, 1>& b, int64_t ldb, int64_t stride_b, float beta,
                 sycl::buffer<float, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                           stride_b, beta, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size));
 }
 
 void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
                 int64_t k, double alpha, sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<double, 1>& b, int64_t ldb, int64_t stride_b, double beta,
                 sycl::buffer<double, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                           stride_b, beta, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size));
 }
 
 void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -159,8 +177,9 @@ void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t
                 int64_t lda, int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb,
                 int64_t stride_b, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
                 int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                           stride_b, beta, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size));
 }
 
 void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -169,8 +188,9 @@ void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t
                 int64_t ldb, int64_t stride_b, std::complex<double> beta,
                 sycl::buffer<std::complex<double>, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                           stride_b, beta, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size));
 }
 
 void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -178,8 +198,9 @@ void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t
                 int64_t stride_a, sycl::buffer<sycl::half, 1>& b, int64_t ldb, int64_t stride_b,
                 sycl::half beta, sycl::buffer<sycl::half, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                           stride_b, beta, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size));
 }
 
 void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -187,8 +208,9 @@ void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t
                 int64_t stride_a, sycl::buffer<sycl::half, 1>& b, int64_t ldb, int64_t stride_b,
                 float beta, sycl::buffer<float, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                           stride_b, beta, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size));
 }
 
 void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -205,24 +227,29 @@ void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t
                 int64_t stride_a, sycl::buffer<std::int8_t, 1>& b, int64_t ldb, int64_t stride_b,
                 float beta, sycl::buffer<std::int32_t, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                           stride_b, beta, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size));
 }
 
 void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
                 diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer<float, 1>& a,
                 int64_t lda, int64_t stride_a, sycl::buffer<float, 1>& b, int64_t ldb,
                 int64_t stride_b, int64_t batch_size) {
-    blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
-                           stride_a, b, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size));
 }
 
 void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
                 diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer<double, 1>& a,
                 int64_t lda, int64_t stride_a, sycl::buffer<double, 1>& b, int64_t ldb,
                 int64_t stride_b, int64_t batch_size) {
-    blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
-                           stride_a, b, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size));
 }
 
 void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
@@ -230,8 +257,10 @@ void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose
                 sycl::buffer<std::complex<float>, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<std::complex<float>, 1>& b, int64_t ldb, int64_t stride_b,
                 int64_t batch_size) {
-    blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
-                           stride_a, b, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size));
 }
 
 void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
@@ -239,31 +268,36 @@ void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose
                 sycl::buffer<std::complex<double>, 1>& a, int64_t lda, int64_t stride_a,
                 sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, int64_t stride_b,
                 int64_t batch_size) {
-    blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda,
-                           stride_a, b, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size));
 }
 
 void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
                 float alpha, sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a, float beta,
                 sycl::buffer<float, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size));
 }
 
 void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
                 double alpha, sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
                 double beta, sycl::buffer<double, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size));
 }
 
 void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
                 int64_t stride_a, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
                 int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size));
 }
 
 void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
@@ -271,78 +305,89 @@ void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n
                 int64_t stride_a, std::complex<double> beta,
                 sycl::buffer<std::complex<double>, 1>& c, int64_t ldc, int64_t stride_c,
                 int64_t batch_size) {
-    blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc,
-                           stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size));
 }
 
 void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
                     sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a,
                     sycl::buffer<float, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) {
-    blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                               batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans),
+                                                         m, n, alpha, a, lda, stride_a, b, ldb,
+                                                         stride_b, batch_size));
 }
 
 void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
                     sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
                     sycl::buffer<double, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) {
-    blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                               batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans),
+                                                         m, n, alpha, a, lda, stride_a, b, ldb,
+                                                         stride_b, batch_size));
 }
 
 void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                     std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
                     int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb,
                     int64_t stride_b, int64_t batch_size) {
-    blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                               batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans),
+                                                         m, n, alpha, a, lda, stride_a, b, ldb,
+                                                         stride_b, batch_size));
 }
 
 void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                     std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                     int64_t lda, int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     int64_t ldb, int64_t stride_b, int64_t batch_size) {
-    blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                               batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans),
+                                                         m, n, alpha, a, lda, stride_a, b, ldb,
+                                                         stride_b, batch_size));
 }
 
 void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
                     sycl::buffer<float, 1>& ab, int64_t lda, int64_t ldb, int64_t stride,
                     int64_t batch_size) {
-    blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(
+        queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size));
 }
 
 void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
                     sycl::buffer<double, 1>& ab, int64_t lda, int64_t ldb, int64_t stride,
                     int64_t batch_size) {
-    blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(
+        queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size));
 }
 
 void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                     std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
                     int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) {
-    blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(
+        queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size));
 }
 
 void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                     std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
                     int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) {
-    blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(
+        queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size));
 }
 
 void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
                    float alpha, sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a,
                    float beta, sycl::buffer<float, 1>& b, int64_t ldb, int64_t stride_b,
                    sycl::buffer<float, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb,
-                              stride_b, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size));
 }
 
 void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
                    double alpha, sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
                    double beta, sycl::buffer<double, 1>& b, int64_t ldb, int64_t stride_b,
                    sycl::buffer<double, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) {
-    blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb,
-                              stride_b, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size));
 }
 
 void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -351,8 +396,9 @@ void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64
                    sycl::buffer<std::complex<float>, 1>& b, int64_t ldb, int64_t stride_b,
                    sycl::buffer<std::complex<float>, 1>& c, int64_t ldc, int64_t stride_c,
                    int64_t batch_size) {
-    blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb,
-                              stride_b, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size));
 }
 
 void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -361,8 +407,9 @@ void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64
                    sycl::buffer<std::complex<double>, 1>& b, int64_t ldb, int64_t stride_b,
                    sycl::buffer<std::complex<double>, 1>& c, int64_t ldc, int64_t stride_c,
                    int64_t batch_size) {
-    blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb,
-                              stride_b, c, ldc, stride_c, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size));
 }
 
 // USM APIs
@@ -370,127 +417,128 @@ void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64
 sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx,
                        std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx,
                        std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex<float>* x, int64_t incx,
                        std::int64_t stridex, std::complex<float>* y, int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex<double>* x, int64_t incx,
                        std::int64_t stridex, std::complex<double>* y, int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y,
                        int64_t* incy, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies));
 }
 
 sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y,
                        int64_t* incy, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies));
 }
 
 sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex<float>** x, int64_t* incx,
                        std::complex<float>** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies));
 }
 
 sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex<double>** x,
                        int64_t* incx, std::complex<double>** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx,
                        int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx,
                        int64_t stridex, double* y, int64_t incy, int64_t stridey,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex<float> alpha,
                        const std::complex<float>* x, int64_t incx, int64_t stridex,
                        std::complex<float>* y, int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex<double> alpha,
                        const std::complex<double>* x, int64_t incx, int64_t stridex,
                        std::complex<double>* y, int64_t incy, int64_t stridey, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy,
+                                                         stridey, batch_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx,
                        float** y, int64_t* incy, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy,
+                                                         group_count, group_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x,
                        int64_t* incx, double** y, int64_t* incy, int64_t group_count,
                        int64_t* group_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy,
+                                                         group_count, group_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex<float>* alpha,
                        const std::complex<float>** x, int64_t* incx, std::complex<float>** y,
                        int64_t* incy, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy,
+                                                         group_count, group_size, dependencies));
 }
 
 sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex<double>* alpha,
                        const std::complex<double>** x, int64_t* incx, std::complex<double>** y,
                        int64_t* incy, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size,
-                                  dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy,
+                                                         group_count, group_size, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha,
                        const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx,
                        int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x,
-                                  beta, y, incy, stride_y, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(
+        queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx,
+        stride_x, beta, y, incy, stride_y, batch_size, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha,
@@ -498,8 +546,9 @@ sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t
                        int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy,
                        int64_t stride_y, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x,
-                                  beta, y, incy, stride_y, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(
+        queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx,
+        stride_x, beta, y, incy, stride_y, batch_size, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n,
@@ -508,8 +557,9 @@ sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t
                        int64_t stride_x, std::complex<float> beta, std::complex<float>* y,
                        int64_t incy, int64_t stride_y, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x,
-                                  beta, y, incy, stride_y, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(
+        queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx,
+        stride_x, beta, y, incy, stride_y, batch_size, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n,
@@ -518,24 +568,27 @@ sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t
                        int64_t stride_x, std::complex<double> beta, std::complex<double>* y,
                        int64_t incy, int64_t stride_y, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x,
-                                  beta, y, incy, stride_y, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(
+        queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx,
+        stride_x, beta, y, incy, stride_y, batch_size, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, float* alpha,
                        const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta,
                        float** y, int64_t* incy, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x,
+                               incx, beta, y, incy, group_count, groupsize, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, double* alpha,
                        const double** a, int64_t* lda, const double** x, int64_t* incx,
                        double* beta, double** y, int64_t* incy, int64_t group_count,
                        int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x,
+                               incx, beta, y, incy, group_count, groupsize, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n,
@@ -543,8 +596,9 @@ sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_
                        const std::complex<float>** x, int64_t* incx, std::complex<float>* beta,
                        std::complex<float>** y, int64_t* incy, int64_t group_count,
                        int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x,
+                               incx, beta, y, incy, group_count, groupsize, dependencies));
 }
 
 sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n,
@@ -552,24 +606,27 @@ sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_
                        const std::complex<double>** x, int64_t* incx, std::complex<double>* beta,
                        std::complex<double>** y, int64_t* incy, int64_t group_count,
                        int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x,
+                               incx, beta, y, incy, group_count, groupsize, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a,
                        int64_t lda, int64_t stride_a, const float* x, int64_t incx,
                        int64_t stride_x, float* c, int64_t ldc, int64_t stride_c,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a,
+                               x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a,
                        int64_t lda, int64_t stride_a, const double* x, int64_t incx,
                        int64_t stride_x, double* c, int64_t ldc, int64_t stride_c,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a,
+                               x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
@@ -577,8 +634,9 @@ sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n
                        const std::complex<float>* x, int64_t incx, int64_t stride_x,
                        std::complex<float>* c, int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a,
+                               x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n,
@@ -586,40 +644,45 @@ sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n
                        const std::complex<double>* x, int64_t incx, int64_t stride_x,
                        std::complex<double>* c, int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a,
+                               x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n,
                        const float** a, int64_t* lda, const float** x, int64_t* incx, float** c,
                        int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count,
-                                  groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right),
+                                                         m, n, a, lda, x, incx, c, ldc, group_count,
+                                                         groupsize, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n,
                        const double** a, int64_t* lda, const double** x, int64_t* incx, double** c,
                        int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count,
-                                  groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right),
+                                                         m, n, a, lda, x, incx, c, ldc, group_count,
+                                                         groupsize, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n,
                        const std::complex<float>** a, int64_t* lda, const std::complex<float>** x,
                        int64_t* incx, std::complex<float>** c, int64_t* ldc, int64_t group_count,
                        int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count,
-                                  groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right),
+                                                         m, n, a, lda, x, incx, c, ldc, group_count,
+                                                         groupsize, dependencies));
 }
 
 sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n,
                        const std::complex<double>** a, int64_t* lda, const std::complex<double>** x,
                        int64_t* incx, std::complex<double>** c, int64_t* ldc, int64_t group_count,
                        int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count,
-                                  groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right),
+                                                         m, n, a, lda, x, incx, c, ldc, group_count,
+                                                         groupsize, dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -627,8 +690,10 @@ sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, i
                        const float* b, int64_t ldb, int64_t stride_b, float beta, float* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                                  stride_b, beta, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -636,8 +701,10 @@ sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, i
                        const double* b, int64_t ldb, int64_t stride_b, double beta, double* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                                  stride_b, beta, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -646,8 +713,10 @@ sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, i
                        int64_t stride_b, std::complex<float> beta, std::complex<float>* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                                  stride_b, beta, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -656,8 +725,10 @@ sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, i
                        int64_t stride_b, std::complex<double> beta, std::complex<double>* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                                  stride_b, beta, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -665,8 +736,10 @@ sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, i
                        int64_t stride_a, const sycl::half* b, int64_t ldb, int64_t stride_b,
                        sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                                  stride_b, beta, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -674,8 +747,10 @@ sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, i
                        const sycl::half* b, int64_t ldb, int64_t stride_b, float beta, float* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                                  stride_b, beta, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -692,8 +767,10 @@ sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, i
                        const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta,
                        std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb,
-                                  stride_b, beta, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m,
@@ -701,8 +778,9 @@ sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb,
                        const float** b, int64_t* ldb, float* beta, float** c, int64_t* ldc,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c,
-                                  ldc, group_count, group_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m,
@@ -710,8 +788,9 @@ sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb,
                        const double** b, int64_t* ldb, double* beta, double** c, int64_t* ldc,
                        int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c,
-                                  ldc, group_count, group_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m,
@@ -720,8 +799,9 @@ sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb,
                        int64_t* ldb, std::complex<float>* beta, std::complex<float>** c,
                        int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c,
-                                  ldc, group_count, group_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m,
@@ -730,8 +810,9 @@ sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb,
                        int64_t* ldb, std::complex<double>* beta, std::complex<double>** c,
                        int64_t* ldc, int64_t group_count, int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c,
-                                  ldc, group_count, group_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m,
@@ -739,8 +820,9 @@ sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb,
                        int64_t* lda, const sycl::half** b, int64_t* ldb, sycl::half* beta,
                        sycl::half** c, int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c,
-                                  ldc, group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m,
@@ -748,8 +830,9 @@ sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb,
                        const sycl::half** b, int64_t* ldb, float* beta, float** c, int64_t* ldc,
                        int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c,
-                                  ldc, group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies));
 }
 
 sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m,
@@ -766,24 +849,29 @@ sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb,
                        const std::int8_t** b, int64_t* ldb, float* beta, std::int32_t** c,
                        int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c,
-                                  ldc, group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
                        diag unit_diag, int64_t m, int64_t n, float alpha, const float* a,
                        int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
                        diag unit_diag, int64_t m, int64_t n, double alpha, const double* a,
                        int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b,
                        int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
@@ -791,8 +879,10 @@ sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, tr
                        const std::complex<float>* a, int64_t lda, int64_t stride_a,
                        std::complex<float>* b, int64_t ldb, int64_t stride_b, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
@@ -800,24 +890,30 @@ sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, tr
                        const std::complex<double>* a, int64_t lda, int64_t stride_a,
                        std::complex<double>* b, int64_t ldb, int64_t stride_b, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans,
                        diag* unit_diag, int64_t* m, int64_t* n, float* alpha, const float** a,
                        int64_t* lda, float** b, int64_t* ldb, int64_t group_count,
                        int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, b, ldb, group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans,
                        diag* unit_diag, int64_t* m, int64_t* n, double* alpha, const double** a,
                        int64_t* lda, double** b, int64_t* ldb, int64_t group_count,
                        int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, b, ldb, group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans,
@@ -825,8 +921,10 @@ sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower,
                        const std::complex<float>** a, int64_t* lda, std::complex<float>** b,
                        int64_t* ldb, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, b, ldb, group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans,
@@ -834,24 +932,28 @@ sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower,
                        const std::complex<double>** a, int64_t* lda, std::complex<double>** b,
                        int64_t* ldb, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a,
-                                  lda, b, ldb, group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
                        float alpha, const float* a, int64_t lda, int64_t stride_a, float beta,
                        float* c, int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
                        double alpha, const double* a, int64_t lda, int64_t stride_a, double beta,
                        double* c, int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
@@ -859,8 +961,9 @@ sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, in
                        int64_t stride_a, std::complex<float> beta, std::complex<float>* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
@@ -868,24 +971,27 @@ sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, in
                        int64_t stride_a, std::complex<double> beta, std::complex<double>* c,
                        int64_t ldc, int64_t stride_c, int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c,
-                                  ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n,
                        int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta,
                        float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n,
                        int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta,
                        double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n,
@@ -893,8 +999,9 @@ sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans,
                        int64_t* lda, std::complex<float>* beta, std::complex<float>** c,
                        int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies));
 }
 
 sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n,
@@ -902,70 +1009,79 @@ sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans,
                        int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
                        int64_t* ldc, int64_t group_count, int64_t* groupsize,
                        const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                                  group_count, groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
                            const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb,
                            int64_t stride_b, int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                                      batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
                            const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb,
                            int64_t stride_b, int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                                      batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                            std::complex<float> alpha, const std::complex<float>* a, int64_t lda,
                            int64_t stride_a, std::complex<float>* b, int64_t ldb, int64_t stride_b,
                            int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                                      batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                            std::complex<double> alpha, const std::complex<double>* a, int64_t lda,
                            int64_t stride_a, std::complex<double>* b, int64_t ldb, int64_t stride_b,
                            int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b,
-                                      batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   stride_a, b, ldb, stride_b, batch_size, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
                            float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size,
-                                      dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, stride, batch_size, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
                            double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size,
-                                      dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, stride, batch_size, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                            std::complex<float> alpha, std::complex<float>* ab, int64_t lda,
                            int64_t ldb, int64_t stride, int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size,
-                                      dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, stride, batch_size, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                            std::complex<double> alpha, std::complex<double>* ab, int64_t lda,
                            int64_t ldb, int64_t stride, int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size,
-                                      dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, stride, batch_size, dependencies));
 }
 
 sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m,
@@ -973,8 +1089,10 @@ sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb
                           float beta, const float* b, int64_t ldb, int64_t stride_b, float* c,
                           int64_t ldc, int64_t stride_c, int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b,
-                                     ldb, stride_b, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m,
@@ -982,8 +1100,10 @@ sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb
                           double beta, const double* b, int64_t ldb, int64_t stride_b, double* c,
                           int64_t ldc, int64_t stride_c, int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b,
-                                     ldb, stride_b, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m,
@@ -992,8 +1112,10 @@ sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb
                           const std::complex<float>* b, int64_t ldb, int64_t stride_b,
                           std::complex<float>* c, int64_t ldc, int64_t stride_c, int64_t batch_size,
                           const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b,
-                                     ldb, stride_b, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m,
@@ -1002,32 +1124,37 @@ sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb
                           const std::complex<double>* b, int64_t ldb, int64_t stride_b,
                           std::complex<double>* c, int64_t ldc, int64_t stride_c,
                           int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b,
-                                     ldb, stride_b, c, ldc, stride_c, batch_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size,
+        dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
                            float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb,
                            int64_t group_count, int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, b, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
                            double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb,
                            int64_t group_count, int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, b, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
                            std::complex<float>* alpha, const std::complex<float>** a, int64_t* lda,
                            std::complex<float>** b, int64_t* ldb, int64_t group_count,
                            int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, b, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
@@ -1035,38 +1162,43 @@ sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int
                            int64_t* lda, std::complex<double>** b, int64_t* ldb,
                            int64_t group_count, int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, b, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda,
+                                   b, ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
                            float* alpha, float** ab, int64_t* lda, int64_t* ldb,
                            int64_t group_count, int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
                            double* alpha, double** ab, int64_t* lda, int64_t* ldb,
                            int64_t group_count, int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
                            std::complex<float>* alpha, std::complex<float>** ab, int64_t* lda,
                            int64_t* ldb, int64_t group_count, int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, group_count, groupsize, dependencies));
 }
 
 sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n,
                            std::complex<double>* alpha, std::complex<double>** ab, int64_t* lda,
                            int64_t* ldb, int64_t group_count, int64_t* groupsize,
                            const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, group_count,
-                                      groupsize, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda,
+                                   ldb, group_count, groupsize, dependencies));
 }
diff --git a/src/blas/backends/mkl_common/mkl_blas_backend.hpp b/src/blas/backends/mkl_common/mkl_blas_backend.hpp
index d45208a6d..4298b3503 100644
--- a/src/blas/backends/mkl_common/mkl_blas_backend.hpp
+++ b/src/blas/backends/mkl_common/mkl_blas_backend.hpp
@@ -21,11 +21,15 @@
 
 #include <complex>
 
-#include "mkl_version.h"
-#include "oneapi/mkl/types.hpp"
+// Intel(R) oneMKL headers
+#include <mkl_version.h>
+#include <mkl/blas.hpp>
+
+#include "common_onemkl_conversion.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 template <typename T>
 class value_or_pointer {
@@ -65,20 +69,5 @@ class value_or_pointer {
     }
 };
 
-namespace blas {
-
-namespace column_major {
-
-#include "mkl_blas_backend.hxx"
-
-}
-
-namespace row_major {
-
-#include "mkl_blas_backend.hxx"
-
-}
-
-} // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mkl_common/mkl_blas_backend.hxx b/src/blas/backends/mkl_common/mkl_blas_backend.hxx
deleted file mode 100644
index ca0c036f1..000000000
--- a/src/blas/backends/mkl_common/mkl_blas_backend.hxx
+++ /dev/null
@@ -1,2518 +0,0 @@
-/*******************************************************************************
-* Copyright 2022 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions
-* and limitations under the License.
-*
-*
-* SPDX-License-Identifier: Apache-2.0
-*******************************************************************************/
-
-/// level3, buffer
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-          sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
-          std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-          sycl::buffer<double, 1>& b, std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
-          std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-          std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-          std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-          std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-          std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
-          sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, sycl::half beta,
-          sycl::buffer<sycl::half, 1>& c, std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
-          sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
-          std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a, std::int64_t lda,
-          sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
-          std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a, std::int64_t lda,
-          sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta, sycl::buffer<bfloat16, 1>& c,
-          std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a, std::int64_t lda,
-          sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, float beta,
-          sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc);
-
-void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-          std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a, std::int64_t lda,
-          sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
-          std::int64_t ldc);
-
-void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
-          float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-          std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
-          double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-          std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
-
-void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
-          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
-          sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
-          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
-          sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
-          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
-          sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
-          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
-          sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-          float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
-          sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-          double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
-          sycl::buffer<double, 1>& c, std::int64_t ldc);
-
-void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-          float alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, float beta,
-          sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-          double alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, double beta,
-          sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-           float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-
-void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-           std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
-
-void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
-           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
-           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
-           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
-           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
-           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-          sycl::buffer<float, 1>& b, std::int64_t ldb);
-
-void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-          std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb);
-
-void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-
-void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-
-void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-          sycl::buffer<float, 1>& b, std::int64_t ldb);
-
-void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-          std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb);
-
-void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-
-void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag,
-          std::int64_t m, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-
-// level 3, USM
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<float> alpha, const float* a,
-                 std::int64_t lda, const float* b, std::int64_t ldb, value_or_pointer<float> beta,
-                 float* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<double> alpha, const double* a,
-                 std::int64_t lda, const double* b, std::int64_t ldb, value_or_pointer<double> beta,
-                 double* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<std::complex<float>> alpha,
-                 const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                 std::int64_t ldb, value_or_pointer<std::complex<float>> beta,
-                 std::complex<float>* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<std::complex<double>> alpha,
-                 const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                 std::int64_t ldb, value_or_pointer<std::complex<double>> beta,
-                 std::complex<double>* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<sycl::half> alpha,
-                 const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
-                 value_or_pointer<sycl::half> beta, sycl::half* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<float> alpha, const sycl::half* a,
-                 std::int64_t lda, const sycl::half* b, std::int64_t ldb,
-                 value_or_pointer<float> beta, float* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<float> alpha, const bfloat16* a,
-                 std::int64_t lda, const bfloat16* b, std::int64_t ldb,
-                 value_or_pointer<float> beta, float* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<float> alpha, const bfloat16* a,
-                 std::int64_t lda, const bfloat16* b, std::int64_t ldb,
-                 value_or_pointer<float> beta, bfloat16* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                 const std::int8_t* a, std::int64_t lda, const std::int8_t* b, std::int64_t ldb,
-                 value_or_pointer<float> beta, std::int32_t* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                 std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                 const std::int8_t* a, std::int64_t lda, const std::int8_t* b, std::int64_t ldb,
-                 value_or_pointer<float> beta, float* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
-                 std::int64_t n, value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                 const float* b, std::int64_t ldb, value_or_pointer<float> beta, float* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
-                 std::int64_t n, value_or_pointer<double> alpha, const double* a, std::int64_t lda,
-                 const double* b, std::int64_t ldb, value_or_pointer<double> beta, double* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
-                 std::int64_t n, value_or_pointer<std::complex<float>> alpha,
-                 const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                 std::int64_t ldb, value_or_pointer<std::complex<float>> beta,
-                 std::complex<float>* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
-                 std::int64_t n, value_or_pointer<std::complex<double>> alpha,
-                 const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                 std::int64_t ldb, value_or_pointer<std::complex<double>> beta,
-                 std::complex<double>* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
-                 std::int64_t n, value_or_pointer<std::complex<float>> alpha,
-                 const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                 std::int64_t ldb, value_or_pointer<std::complex<float>> beta,
-                 std::complex<float>* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
-                 std::int64_t n, value_or_pointer<std::complex<double>> alpha,
-                 const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                 std::int64_t ldb, value_or_pointer<std::complex<double>> beta,
-                 std::complex<double>* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                 std::int64_t k, value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                 value_or_pointer<float> beta, float* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                 std::int64_t k, value_or_pointer<double> alpha, const double* a, std::int64_t lda,
-                 value_or_pointer<double> beta, double* c, std::int64_t ldc,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                 std::int64_t k, value_or_pointer<std::complex<float>> alpha,
-                 const std::complex<float>* a, std::int64_t lda,
-                 value_or_pointer<std::complex<float>> beta, std::complex<float>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                 std::int64_t k, value_or_pointer<std::complex<double>> alpha,
-                 const std::complex<double>* a, std::int64_t lda,
-                 value_or_pointer<std::complex<double>> beta, std::complex<double>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                 std::int64_t k, value_or_pointer<float> alpha, const std::complex<float>* a,
-                 std::int64_t lda, value_or_pointer<float> beta, std::complex<float>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                 std::int64_t k, value_or_pointer<double> alpha, const std::complex<double>* a,
-                 std::int64_t lda, value_or_pointer<double> beta, std::complex<double>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                  std::int64_t k, value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                  const float* b, std::int64_t ldb, value_or_pointer<float> beta, float* c,
-                  std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                  std::int64_t k, value_or_pointer<double> alpha, const double* a, std::int64_t lda,
-                  const double* b, std::int64_t ldb, value_or_pointer<double> beta, double* c,
-                  std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                  std::int64_t k, value_or_pointer<std::complex<float>> alpha,
-                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                  std::int64_t ldb, value_or_pointer<std::complex<float>> beta,
-                  std::complex<float>* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                  std::int64_t k, value_or_pointer<std::complex<double>> alpha,
-                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                  std::int64_t ldb, value_or_pointer<std::complex<double>> beta,
-                  std::complex<double>* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                  std::int64_t k, value_or_pointer<std::complex<float>> alpha,
-                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                  std::int64_t ldb, value_or_pointer<float> beta, std::complex<float>* c,
-                  std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                  std::int64_t k, value_or_pointer<std::complex<double>> alpha,
-                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                  std::int64_t ldb, value_or_pointer<double> beta, std::complex<double>* c,
-                  std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<float> alpha,
-                 const float* a, std::int64_t lda, float* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<double> alpha,
-                 const double* a, std::int64_t lda, double* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<float> alpha,
-                 const float* a, std::int64_t lda, float* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer<double> alpha,
-                 const double* a, std::int64_t lda, double* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                 std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                 diag unit_diag, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                 std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                 const std::vector<sycl::event>& dependencies = {});
-
-// level 2, buffer
-
-void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha,
-          sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
-          float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha,
-          sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-          std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
-          std::int64_t ku, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-          sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
-          std::int64_t incy);
-
-void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
-          std::int64_t ku, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-          sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
-          std::int64_t incy);
-
-void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
-          std::int64_t ku, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-          std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-          std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
-          std::int64_t ku, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-          std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-          std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-         std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
-         std::int64_t lda);
-
-void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha,
-         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-         std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda);
-
-void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-
-void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-
-void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k,
-          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k,
-          std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-
-void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-
-void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-
-void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& a, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
-          std::int64_t incy);
-
-void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& a, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx, std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
-          std::int64_t incy);
-
-void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-         sycl::buffer<std::complex<float>, 1>& a);
-
-void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-         sycl::buffer<std::complex<double>, 1>& a);
-
-void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<float>, 1>& a);
-
-void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<double>, 1>& a);
-
-void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha,
-          sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
-          float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha,
-          sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-          std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-          sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
-          float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-          sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-          std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-         sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a, std::int64_t lda);
-
-void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
-         std::int64_t lda);
-
-void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-          std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda);
-
-void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda);
-
-void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-          sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
-          sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-          sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
-          sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-         sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a);
-
-void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a);
-
-void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha,
-          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
-          std::int64_t incy, sycl::buffer<float, 1>& a);
-
-void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha,
-          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
-          std::int64_t incy, sycl::buffer<double, 1>& a);
-
-void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-          std::int64_t incx);
-
-void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-          std::int64_t incx);
-
-void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-          std::int64_t incx);
-
-void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-          std::int64_t incx);
-
-void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          std::int64_t k, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx);
-
-void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx);
-
-void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<float>, 1>& a, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx);
-
-void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<double>, 1>& a, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx);
-
-void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx);
-
-void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx);
-
-void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<float>, 1>& a, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx);
-
-void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<double>, 1>& a, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx);
-
-void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-          std::int64_t incx);
-
-void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-          std::int64_t incx);
-
-void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
-          std::int64_t incx);
-
-void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
-          std::int64_t incx);
-
-void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n,
-          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-// level 2, USM
-
-sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 value_or_pointer<float> alpha, const float* a, std::int64_t lda, const float* x,
-                 std::int64_t incx, value_or_pointer<float> beta, float* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 value_or_pointer<double> alpha, const double* a, std::int64_t lda, const double* x,
-                 std::int64_t incx, value_or_pointer<double> beta, double* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                 std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<float>> beta, std::complex<float>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                 std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<double>> beta, std::complex<double>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 std::int64_t kl, std::int64_t ku, value_or_pointer<float> alpha, const float* a,
-                 std::int64_t lda, const float* x, std::int64_t incx, value_or_pointer<float> beta,
-                 float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 std::int64_t kl, std::int64_t ku, value_or_pointer<double> alpha, const double* a,
-                 std::int64_t lda, const double* x, std::int64_t incx,
-                 value_or_pointer<double> beta, double* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 std::int64_t kl, std::int64_t ku, value_or_pointer<std::complex<float>> alpha,
-                 const std::complex<float>* a, std::int64_t lda, const std::complex<float>* x,
-                 std::int64_t incx, value_or_pointer<std::complex<float>> beta,
-                 std::complex<float>* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                 std::int64_t kl, std::int64_t ku, value_or_pointer<std::complex<double>> alpha,
-                 const std::complex<double>* a, std::int64_t lda, const std::complex<double>* x,
-                 std::int64_t incx, value_or_pointer<std::complex<double>> beta,
-                 std::complex<double>* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, value_or_pointer<float> alpha,
-                const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* a,
-                std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, value_or_pointer<double> alpha,
-                const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a,
-                std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* x,
-                 std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                 std::complex<float>* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* x,
-                 std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                 std::complex<double>* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* x,
-                 std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                 std::complex<float>* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* x,
-                 std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                 std::complex<double>* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                 std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<float>> beta, std::complex<float>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                 std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<double>> beta, std::complex<double>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                 std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<float>> beta, std::complex<float>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                 std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<double>> beta, std::complex<double>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer<float> alpha,
-                const std::complex<float>* x, std::int64_t incx, std::complex<float>* a,
-                std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                value_or_pointer<double> alpha, const std::complex<double>* x, std::int64_t incx,
-                std::complex<double>* a, std::int64_t lda,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* x,
-                 std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                 std::complex<float>* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* x,
-                 std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                 std::complex<double>* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                 const std::complex<float>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<float>> beta, std::complex<float>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                 const std::complex<double>* x, std::int64_t incx,
-                 value_or_pointer<std::complex<double>> beta, std::complex<double>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer<float> alpha,
-                const std::complex<float>* x, std::int64_t incx, std::complex<float>* a,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                value_or_pointer<double> alpha, const std::complex<double>* x, std::int64_t incx,
-                std::complex<double>* a, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<float>> alpha, const std::complex<float>* x,
-                 std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                 std::complex<float>* a, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<std::complex<double>> alpha, const std::complex<double>* x,
-                 std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                 std::complex<double>* a, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k,
-                 value_or_pointer<float> alpha, const float* a, std::int64_t lda, const float* x,
-                 std::int64_t incx, value_or_pointer<float> beta, float* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k,
-                 value_or_pointer<double> alpha, const double* a, std::int64_t lda, const double* x,
-                 std::int64_t incx, value_or_pointer<double> beta, double* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<float> alpha, const float* a, std::int64_t lda, const float* x,
-                 std::int64_t incx, value_or_pointer<float> beta, float* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<double> alpha, const double* a, std::int64_t lda, const double* x,
-                 std::int64_t incx, value_or_pointer<double> beta, double* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer<float> alpha,
-                const float* x, std::int64_t incx, float* a, std::int64_t lda,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                value_or_pointer<double> alpha, const double* x, std::int64_t incx, double* a,
-                std::int64_t lda, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<float> alpha, const float* x, std::int64_t incx, const float* y,
-                 std::int64_t incy, float* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<double> alpha, const double* x, std::int64_t incx,
-                 const double* y, std::int64_t incy, double* a, std::int64_t lda,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<float> alpha, const float* a, const float* x, std::int64_t incx,
-                 value_or_pointer<float> beta, float* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<double> alpha, const double* a, const double* x,
-                 std::int64_t incx, value_or_pointer<double> beta, double* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer<float> alpha,
-                const float* x, std::int64_t incx, float* a,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                value_or_pointer<double> alpha, const double* x, std::int64_t incx, double* a,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<float> alpha, const float* x, std::int64_t incx, const float* y,
-                 std::int64_t incy, float* a, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n,
-                 value_or_pointer<double> alpha, const double* x, std::int64_t incx,
-                 const double* y, std::int64_t incy, double* a,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const std::complex<float>* a, std::int64_t lda,
-                 std::complex<float>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const std::complex<double>* a, std::int64_t lda,
-                 std::complex<double>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const std::complex<float>* a, std::int64_t lda,
-                 std::complex<float>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, std::int64_t k, const std::complex<double>* a, std::int64_t lda,
-                 std::complex<double>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const float* a, float* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const double* a, double* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<float>* a, std::complex<float>* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<double>* a, std::complex<double>* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const float* a, float* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const double* a, double* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<float>* a, std::complex<float>* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<double>* a, std::complex<double>* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<float>* a, std::int64_t lda,
-                 std::complex<float>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<double>* a, std::int64_t lda,
-                 std::complex<double>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<float>* a, std::int64_t lda,
-                 std::complex<float>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
-                 std::int64_t n, const std::complex<double>* a, std::int64_t lda,
-                 std::complex<double>* x, std::int64_t incx,
-                 const std::vector<sycl::event>& dependencies = {});
-
-// level 1, buffer
-
-void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<float>, 1>& result);
-
-void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<double>, 1>& result);
-
-void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<float>, 1>& result);
-
-void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
-          sycl::buffer<std::complex<double>, 1>& result);
-
-void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-           sycl::buffer<std::int64_t, 1>& result, index_base base = index_base::zero);
-
-void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-           sycl::buffer<std::int64_t, 1>& result, index_base base = index_base::zero);
-
-void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-           std::int64_t incx, sycl::buffer<std::int64_t, 1>& result,
-           index_base base = index_base::zero);
-
-void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-           std::int64_t incx, sycl::buffer<std::int64_t, 1>& result,
-           index_base base = index_base::zero);
-
-void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-           sycl::buffer<std::int64_t, 1>& result, index_base base = index_base::zero);
-
-void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-           sycl::buffer<std::int64_t, 1>& result, index_base base = index_base::zero);
-
-void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-           std::int64_t incx, sycl::buffer<std::int64_t, 1>& result,
-           index_base base = index_base::zero);
-
-void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-           std::int64_t incx, sycl::buffer<std::int64_t, 1>& result,
-           index_base base = index_base::zero);
-
-void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& result);
-
-void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx, sycl::buffer<double, 1>& result);
-
-void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-          sycl::buffer<float, 1>& result);
-
-void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-          sycl::buffer<double, 1>& result);
-
-void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
-          std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void axpy(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void axpy(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
-          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-           std::int64_t incx, float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
-           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void axpby(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
-           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void axpby(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
-           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-          sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-          sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-         sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& result);
-
-void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-         sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& result);
-
-void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer<float, 1>& x,
-            std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
-            sycl::buffer<float, 1>& result);
-
-void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-         sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& result);
-
-void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& result);
-
-void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx, sycl::buffer<double, 1>& result);
-
-void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-          sycl::buffer<float, 1>& result);
-
-void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-          sycl::buffer<double, 1>& result);
-
-void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-         std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c,
-         float s);
-
-void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-         std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c,
-         double s);
-
-void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-         sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s);
-
-void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-         sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s);
-
-void rotg(sycl::queue& queue, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& b,
-          sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s);
-
-void rotg(sycl::queue& queue, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& b,
-          sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s);
-
-void rotg(sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
-          sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
-          sycl::buffer<std::complex<float>, 1>& s);
-
-void rotg(sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
-          sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
-          sycl::buffer<std::complex<double>, 1>& s);
-
-void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-          sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& param);
-
-void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-          sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& param);
-
-void rotmg(sycl::queue& queue, sycl::buffer<float, 1>& d1, sycl::buffer<float, 1>& d2,
-           sycl::buffer<float, 1>& x1, float y1, sycl::buffer<float, 1>& param);
-
-void rotmg(sycl::queue& queue, sycl::buffer<double, 1>& d1, sycl::buffer<double, 1>& d2,
-           sycl::buffer<double, 1>& x1, double y1, sycl::buffer<double, 1>& param);
-
-void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-          std::int64_t incx);
-
-void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
-          std::int64_t incx);
-
-void scal(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-
-void scal(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx);
-
-void scal(sycl::queue& queue, std::int64_t n, double alpha,
-          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-
-void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-          sycl::buffer<float, 1>& y, std::int64_t incy);
-
-void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-          sycl::buffer<double, 1>& y, std::int64_t incy);
-
-void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-
-void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-          std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-
-// level 1, USM
-
-sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                 std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                 std::complex<float>* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                 std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                 std::complex<double>* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                 std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
-                 std::complex<float>* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                 std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
-                 std::complex<double>* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                  std::int64_t* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
-                  std::int64_t* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                  std::int64_t incx, std::int64_t* result,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                  std::int64_t incx, std::int64_t* result,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                  std::int64_t* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
-                  std::int64_t* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                  std::int64_t incx, std::int64_t* result,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                  std::int64_t incx, std::int64_t* result,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                 std::int64_t incx, float* result,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                 std::int64_t incx, double* result,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                 float* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
-                 double* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer<float> alpha, const float* x,
-                 std::int64_t incx, float* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer<double> alpha,
-                 const double* x, std::int64_t incx, double* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer<std::complex<float>> alpha,
-                 const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer<std::complex<double>> alpha,
-                 const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer<float> alpha, const float* x,
-                  std::int64_t incx, value_or_pointer<float> beta, float* y, std::int64_t incy,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer<double> alpha,
-                  const double* x, std::int64_t incx, value_or_pointer<double> beta, double* y,
-                  std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer<std::complex<float>> alpha,
-                  const std::complex<float>* x, std::int64_t incx,
-                  value_or_pointer<std::complex<float>> beta, std::complex<float>* y,
-                  std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer<std::complex<double>> alpha,
-                  const std::complex<double>* x, std::int64_t incx,
-                  value_or_pointer<std::complex<double>> beta, std::complex<double>* y,
-                  std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                 std::int64_t incx, std::complex<float>* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                 std::int64_t incx, std::complex<double>* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                const float* y, std::int64_t incy, float* result,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
-                const double* y, std::int64_t incy, double* result,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx,
-                   const float* y, std::int64_t incy, float* result,
-                   const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                const float* y, std::int64_t incy, double* result,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                 std::int64_t incx, float* result,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                 std::int64_t incx, double* result,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                 float* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
-                 double* result, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<float>* x, std::int64_t incx,
-                std::complex<float>* y, std::int64_t incy, value_or_pointer<float> c,
-                value_or_pointer<float> s, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<double>* x, std::int64_t incx,
-                std::complex<double>* y, std::int64_t incy, value_or_pointer<double> c,
-                value_or_pointer<double> s, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y,
-                std::int64_t incy, value_or_pointer<float> c, value_or_pointer<float> s,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y,
-                std::int64_t incy, value_or_pointer<double> c, value_or_pointer<double> s,
-                const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotg(sycl::queue& queue, std::complex<float>* a, std::complex<float>* b, float* c,
-                 std::complex<float>* s, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotg(sycl::queue& queue, std::complex<double>* a, std::complex<double>* b, double* c,
-                 std::complex<double>* s, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y,
-                 std::int64_t incy, const float* param,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y,
-                 std::int64_t incy, const double* param,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, value_or_pointer<float> y1,
-                  float* param, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1,
-                  value_or_pointer<double> y1, double* param,
-                  const std::vector<sycl::event>& dependencies = {});
-
-#define ONEMKL_DECLARE_SCAL(T, Ts)                                                         \
-    sycl::event scal(sycl::queue& queue, std::int64_t n, value_or_pointer<Ts> alpha, T* x, \
-                     std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-ONEMKL_DECLARE_SCAL(float, float)
-ONEMKL_DECLARE_SCAL(double, double)
-ONEMKL_DECLARE_SCAL(std::complex<float>, std::complex<float>)
-ONEMKL_DECLARE_SCAL(std::complex<double>, std::complex<double>)
-ONEMKL_DECLARE_SCAL(std::complex<float>, float)
-ONEMKL_DECLARE_SCAL(std::complex<double>, double)
-sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex<float>* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex<double>* x,
-                 std::int64_t incx, const std::vector<sycl::event>& dependencies = {});
-
-#undef ONEMKL_DECLARE_SCAL
-
-sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y,
-                 std::int64_t incy, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<float>* x, std::int64_t incx,
-                 std::complex<float>* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<double>* x, std::int64_t incx,
-                 std::complex<double>* y, std::int64_t incy,
-                 const std::vector<sycl::event>& dependencies = {});
-
-// extensions, buffer
-
-void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n,
-           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
-           std::int64_t ldc);
-
-void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n,
-           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-           sycl::buffer<double, 1>& b, std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
-           std::int64_t ldc);
-
-void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n,
-           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n,
-           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-               std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-               sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int8_t ao,
-               sycl::buffer<std::uint8_t, 1>& b, std::int64_t ldb, std::uint8_t bo, float beta,
-               sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
-               sycl::buffer<std::int32_t, 1>& co);
-
-void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-               std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-               sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int8_t ao,
-               sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int8_t bo, float beta,
-               sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
-               sycl::buffer<std::int32_t, 1>& co);
-
-void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-               std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-               sycl::buffer<std::uint8_t, 1>& a, std::int64_t lda, std::uint8_t ao,
-               sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int8_t bo, float beta,
-               sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
-               sycl::buffer<std::int32_t, 1>& co);
-
-void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-               std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-               sycl::buffer<std::uint8_t, 1>& a, std::int64_t lda, std::uint8_t ao,
-               sycl::buffer<std::uint8_t, 1>& b, std::int64_t ldb, std::uint8_t bo, float beta,
-               sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc,
-               sycl::buffer<std::int32_t, 1>& co);
-
-// extensions, USM
-
-sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
-                  std::int64_t n, std::int64_t k, value_or_pointer<float> alpha, const float* a,
-                  std::int64_t lda, const float* b, std::int64_t ldb, value_or_pointer<float> beta,
-                  float* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
-                  std::int64_t n, std::int64_t k, value_or_pointer<double> alpha, const double* a,
-                  std::int64_t lda, const double* b, std::int64_t ldb,
-                  value_or_pointer<double> beta, double* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
-                  std::int64_t n, std::int64_t k, value_or_pointer<std::complex<float>> alpha,
-                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                  std::int64_t ldb, value_or_pointer<std::complex<float>> beta,
-                  std::complex<float>* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
-                  std::int64_t n, std::int64_t k, value_or_pointer<std::complex<double>> alpha,
-                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                  std::int64_t ldb, value_or_pointer<std::complex<double>> beta,
-                  std::complex<double>* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-                      std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                      const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b,
-                      std::int64_t ldb, std::uint8_t bo, value_or_pointer<float> beta,
-                      std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
-                      const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-                      std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                      const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b,
-                      std::int64_t ldb, std::int8_t bo, value_or_pointer<float> beta,
-                      std::int32_t* c, std::int64_t ldc, const std::int32_t* co,
-                      const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-                      std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                      const std::uint8_t* a, std::int64_t lda, std::uint8_t ao,
-                      const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
-                      value_or_pointer<float> beta, std::int32_t* c, std::int64_t ldc,
-                      const std::int32_t* co, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
-                      std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                      const std::uint8_t* a, std::int64_t lda, std::uint8_t ao,
-                      const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
-                      value_or_pointer<float> beta, std::int32_t* c, std::int64_t ldc,
-                      const std::int32_t* co, const std::vector<sycl::event>& dependencies = {});
-
-// batch, buffer
-
-void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
-                float alpha, sycl::buffer<float, 1>& a, int64_t lda, int64_t stride_a, float beta,
-                sycl::buffer<float, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size);
-void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
-                double alpha, sycl::buffer<double, 1>& a, int64_t lda, int64_t stride_a,
-                double beta, sycl::buffer<double, 1>& c, int64_t ldc, int64_t stride_c,
-                int64_t batch_size);
-void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
-                std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
-                int64_t stride_a, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
-                int64_t ldc, int64_t stride_c, int64_t batch_size);
-void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k,
-                std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
-                int64_t stride_a, std::complex<double> beta,
-                sycl::buffer<std::complex<double>, 1>& c, int64_t ldc, int64_t stride_c,
-                int64_t batch_size);
-
-void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
-                std::int64_t stridex, sycl::buffer<float, 1>& y, std::int64_t incy,
-                std::int64_t stridey, std::int64_t batch_size);
-
-void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
-                std::int64_t stridex, sycl::buffer<double, 1>& y, std::int64_t incy,
-                std::int64_t stridey, std::int64_t batch_size);
-
-void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
-                std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
-                std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
-                std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
-                std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha,
-                sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
-                sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex, float beta,
-                sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
-                std::int64_t batch_size);
-
-void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha,
-                sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
-                sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex, double beta,
-                sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
-                std::int64_t batch_size);
-
-void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
-                std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
-                sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
-                std::int64_t batch_size);
-
-void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
-                std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
-                sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
-                std::int64_t batch_size);
-
-void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
-                sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
-                sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
-                std::int64_t batch_size);
-
-void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
-                sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
-                sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
-                std::int64_t batch_size);
-
-void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stridea,
-                sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
-                sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc, std::int64_t stridec,
-                std::int64_t batch_size);
-
-void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stridea,
-                sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
-                sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc, std::int64_t stridec,
-                std::int64_t batch_size);
-
-void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
-                sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
-                std::int64_t batch_size);
-
-void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
-                sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
-                std::int64_t batch_size);
-
-void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                diag unit_diag, std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
-                std::int64_t batch_size);
-
-void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                diag unit_diag, std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
-                std::int64_t batch_size);
-
-void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
-                std::int64_t incx, std::int64_t stridex, sycl::buffer<float, 1>& y,
-                std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
-                std::int64_t incx, std::int64_t stridex, sycl::buffer<double, 1>& y,
-                std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-
-void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-                sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
-                sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
-                std::int64_t batch_size);
-
-void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
-                sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
-                sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
-                std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
-                std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer<double, 1>& c,
-                std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
-                std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
-                std::int64_t stride_c, std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
-                std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
-                std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, sycl::half beta,
-                sycl::buffer<sycl::half, 1>& c, std::int64_t ldc, std::int64_t stride_c,
-                std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
-                std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<bfloat16, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer<bfloat16, 1>& c,
-                std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<bfloat16, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
-                std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int8_t, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, float beta,
-                sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc, std::int64_t stride_c,
-                std::int64_t batch_size);
-
-void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::int8_t, 1>& a,
-                std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int8_t, 1>& b,
-                std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c,
-                std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                    sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
-                    std::int64_t batch_size);
-
-void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                    std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
-                    std::int64_t stride_b, std::int64_t batch_size);
-
-void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
-                    std::int64_t lda, std::int64_t stride_a,
-                    sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-                    std::int64_t stride_b, std::int64_t batch_size);
-
-void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-                    std::int64_t lda, std::int64_t stride_a,
-                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                    std::int64_t stride_b, std::int64_t batch_size);
-
-void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda, std::int64_t ldb,
-                    std::int64_t stride, std::int64_t batch_size);
-
-void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda, std::int64_t ldb,
-                    std::int64_t stride, std::int64_t batch_size);
-
-void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
-                    std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                    std::int64_t batch_size);
-
-void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                    std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
-                    std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                    std::int64_t batch_size);
-
-void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                   std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                   std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
-                   std::int64_t stride_b, sycl::buffer<float, 1>& c, std::int64_t ldc,
-                   std::int64_t stride_c, std::int64_t batch_size);
-
-void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                   std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                   std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
-                   std::int64_t stride_b, sycl::buffer<double, 1>& c, std::int64_t ldc,
-                   std::int64_t stride_c, std::int64_t batch_size);
-
-void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                   std::int64_t n, std::complex<float> alpha,
-                   sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                   std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
-                   std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
-                   std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                   std::int64_t n, std::complex<double> alpha,
-                   sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                   std::int64_t stride_a, std::complex<double> beta,
-                   sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-                   std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
-                   std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);
-
-void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha,
-              sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
-              std::int64_t ldb);
-
-void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha,
-              sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
-              std::int64_t ldb);
-
-void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-
-void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-              std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-
-void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha,
-               sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
-               sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t strideb);
-
-void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha,
-               sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
-               sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t strideb);
-
-void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-               std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-               std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-               std::int64_t strideb);
-
-void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-               std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
-               std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
-               std::int64_t ldb, std::int64_t strideb);
-
-void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha,
-              sycl::buffer<float, 1>& ab, std::int64_t lda, std::int64_t ldb);
-
-void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha,
-              sycl::buffer<double, 1>& ab, std::int64_t lda, std::int64_t ldb);
-
-void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
-              std::int64_t ldb);
-
-void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
-              std::int64_t lda, std::int64_t ldb);
-
-void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-             float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
-             sycl::buffer<float, 1>& b, std::int64_t ldb, sycl::buffer<float, 1>& c,
-             std::int64_t ldc);
-
-void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-             double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
-             sycl::buffer<double, 1>& b, std::int64_t ldb, sycl::buffer<double, 1>& c,
-             std::int64_t ldc);
-
-void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-             std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
-             std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
-             sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-
-void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
-             std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-             std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
-             sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-
-// batch, usm
-
-sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans,
-                       const std::int64_t* n, const std::int64_t* k, const float* alpha,
-                       const float** a, const std::int64_t* lda, const float* beta, float** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans,
-                       const std::int64_t* n, const std::int64_t* k, const double* alpha,
-                       const double** a, const std::int64_t* lda, const double* beta, double** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans,
-                       const std::int64_t* n, const std::int64_t* k,
-                       const std::complex<float>* alpha, const std::complex<float>** a,
-                       const std::int64_t* lda, const std::complex<float>* beta,
-                       std::complex<float>** c, const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans,
-                       const std::int64_t* n, const std::int64_t* k,
-                       const std::complex<double>* alpha, const std::complex<double>** a,
-                       const std::int64_t* lda, const std::complex<double>* beta,
-                       std::complex<double>** c, const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                       std::int64_t k, value_or_pointer<float> alpha, const float* a,
-                       std::int64_t lda, std::int64_t stride_a, value_or_pointer<float> beta,
-                       float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                       std::int64_t k, value_or_pointer<double> alpha, const double* a,
-                       std::int64_t lda, std::int64_t stride_a, value_or_pointer<double> beta,
-                       double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                       std::int64_t k, value_or_pointer<std::complex<float>> alpha,
-                       const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-                       value_or_pointer<std::complex<float>> beta, std::complex<float>* c,
-                       std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
-                       std::int64_t k, value_or_pointer<std::complex<double>> alpha,
-                       const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-                       value_or_pointer<std::complex<double>> beta, std::complex<double>* c,
-                       std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
-                       std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey,
-                       std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
-                       std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey,
-                       std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
-                       std::int64_t incx, std::int64_t stridex, std::complex<float>* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
-                       std::int64_t incx, std::int64_t stridex, std::complex<double>* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const float** x,
-                       const std::int64_t* incx, float** y, const std::int64_t* incy,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const double** x,
-                       const std::int64_t* incx, double** y, const std::int64_t* incy,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex<float>** x,
-                       const std::int64_t* incx, std::complex<float>** y, const std::int64_t* incy,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex<double>** x,
-                       const std::int64_t* incx, std::complex<double>** y, const std::int64_t* incy,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                       const float* a, std::int64_t lda, std::int64_t stridea, const float* x,
-                       std::int64_t incx, std::int64_t stridex, float* c, std::int64_t ldc,
-                       std::int64_t stridec, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                       const double* a, std::int64_t lda, std::int64_t stridea, const double* x,
-                       std::int64_t incx, std::int64_t stridex, double* c, std::int64_t ldc,
-                       std::int64_t stridec, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                       const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
-                       const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
-                       std::complex<float>* c, std::int64_t ldc, std::int64_t stridec,
-                       std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n,
-                       const std::complex<double>* a, std::int64_t lda, std::int64_t stridea,
-                       const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
-                       std::complex<double>* c, std::int64_t ldc, std::int64_t stridec,
-                       std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m,
-                       const std::int64_t* n, const float** a, const std::int64_t* lda,
-                       const float** x, const std::int64_t* incx, float** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m,
-                       const std::int64_t* n, const double** a, const std::int64_t* lda,
-                       const double** x, const std::int64_t* incx, double** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m,
-                       const std::int64_t* n, const std::complex<float>** a,
-                       const std::int64_t* lda, const std::complex<float>** x,
-                       const std::int64_t* incx, std::complex<float>** c, const std::int64_t* ldc,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m,
-                       const std::int64_t* n, const std::complex<double>** a,
-                       const std::int64_t* lda, const std::complex<double>** x,
-                       const std::int64_t* incx, std::complex<double>** c, const std::int64_t* ldc,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                       value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                       std::int64_t stridea, const float* x, std::int64_t incx,
-                       std::int64_t stridex, value_or_pointer<float> beta, float* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                       value_or_pointer<double> alpha, const double* a, std::int64_t lda,
-                       std::int64_t stridea, const double* x, std::int64_t incx,
-                       std::int64_t stridex, value_or_pointer<double> beta, double* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                       value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                       std::int64_t lda, std::int64_t stridea, const std::complex<float>* x,
-                       std::int64_t incx, std::int64_t stridex,
-                       value_or_pointer<std::complex<float>> beta, std::complex<float>* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                       value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                       std::int64_t lda, std::int64_t stridea, const std::complex<double>* x,
-                       std::int64_t incx, std::int64_t stridex,
-                       value_or_pointer<std::complex<double>> beta, std::complex<double>* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                       const std::int64_t* n, const float* alpha, const float** a,
-                       const std::int64_t* lda, const float** x, const std::int64_t* incx,
-                       const float* beta, float** y, const std::int64_t* incy,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                       const std::int64_t* n, const double* alpha, const double** a,
-                       const std::int64_t* lda, const double** x, const std::int64_t* incx,
-                       const double* beta, double** y, const std::int64_t* incy,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                       const std::int64_t* n, const std::complex<float>* alpha,
-                       const std::complex<float>** a, const std::int64_t* lda,
-                       const std::complex<float>** x, const std::int64_t* incx,
-                       const std::complex<float>* beta, std::complex<float>** y,
-                       const std::int64_t* incy, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                       const std::int64_t* n, const std::complex<double>* alpha,
-                       const std::complex<double>** a, const std::int64_t* lda,
-                       const std::complex<double>** x, const std::int64_t* incx,
-                       const std::complex<double>* beta, std::complex<double>** y,
-                       const std::int64_t* incy, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const double* alpha,
-                       const double** x, const std::int64_t* incx, double** y,
-                       const std::int64_t* incy, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const float* alpha,
-                       const float** x, const std::int64_t* incx, float** y,
-                       const std::int64_t* incy, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex<double>* alpha,
-                       const std::complex<double>** x, const std::int64_t* incx,
-                       std::complex<double>** y, const std::int64_t* incy, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex<float>* alpha,
-                       const std::complex<float>** x, const std::int64_t* incx,
-                       std::complex<float>** y, const std::int64_t* incy, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, value_or_pointer<float> alpha,
-                       const float* x, std::int64_t incx, std::int64_t stridex, float* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, value_or_pointer<double> alpha,
-                       const double* x, std::int64_t incx, std::int64_t stridex, double* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, std::int64_t n,
-                       value_or_pointer<std::complex<float>> alpha, const std::complex<float>* x,
-                       std::int64_t incx, std::int64_t stridex, std::complex<float>* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event axpy_batch(sycl::queue& queue, std::int64_t n,
-                       value_or_pointer<std::complex<double>> alpha, const std::complex<double>* x,
-                       std::int64_t incx, std::int64_t stridex, std::complex<double>* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const float* alpha, const float** a, const std::int64_t* lda,
-                       const float** b, const std::int64_t* ldb, const float* beta, float** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const double* alpha, const double** a, const std::int64_t* lda,
-                       const double** b, const std::int64_t* ldb, const double* beta, double** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const std::complex<float>* alpha, const std::complex<float>** a,
-                       const std::int64_t* lda, const std::complex<float>** b,
-                       const std::int64_t* ldb, const std::complex<float>* beta,
-                       std::complex<float>** c, const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const std::complex<double>* alpha, const std::complex<double>** a,
-                       const std::int64_t* lda, const std::complex<double>** b,
-                       const std::int64_t* ldb, const std::complex<double>* beta,
-                       std::complex<double>** c, const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const sycl::half* alpha, const sycl::half** a, const std::int64_t* lda,
-                       const sycl::half** b, const std::int64_t* ldb, const sycl::half* beta,
-                       sycl::half** c, const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const float* alpha, const sycl::half** a, const std::int64_t* lda,
-                       const sycl::half** b, const std::int64_t* ldb, const float* beta, float** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const float* alpha, const bfloat16** a, const std::int64_t* lda,
-                       const bfloat16** b, const std::int64_t* ldb, const float* beta, bfloat16** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const float* alpha, const bfloat16** a, const std::int64_t* lda,
-                       const bfloat16** b, const std::int64_t* ldb, const float* beta, float** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const float* alpha, const std::int8_t** a, const std::int64_t* lda,
-                       const std::int8_t** b, const std::int64_t* ldb, const float* beta,
-                       std::int32_t** c, const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb,
-                       const std::int64_t* m, const std::int64_t* n, const std::int64_t* k,
-                       const float* alpha, const std::int8_t** a, const std::int64_t* lda,
-                       const std::int8_t** b, const std::int64_t* ldb, const float* beta, float** c,
-                       const std::int64_t* ldc, std::int64_t group_count,
-                       const std::int64_t* groupsize,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                       const float* a, std::int64_t lda, std::int64_t stride_a, const float* b,
-                       std::int64_t ldb, std::int64_t stride_b, value_or_pointer<float> beta,
-                       float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<double> alpha,
-                       const double* a, std::int64_t lda, std::int64_t stride_a, const double* b,
-                       std::int64_t ldb, std::int64_t stride_b, value_or_pointer<double> beta,
-                       double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<std::complex<float>> alpha,
-                       const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-                       const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<std::complex<float>> beta, std::complex<float>* c,
-                       std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<std::complex<double>> alpha,
-                       const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-                       const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<std::complex<double>> beta, std::complex<double>* c,
-                       std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<sycl::half> alpha,
-                       const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
-                       const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<sycl::half> beta, sycl::half* c, std::int64_t ldc,
-                       std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                       const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
-                       const sycl::half* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<float> beta, float* c, std::int64_t ldc,
-                       std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                       const bfloat16* a, std::int64_t lda, std::int64_t stride_a,
-                       const bfloat16* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<float> beta, bfloat16* c, std::int64_t ldc,
-                       std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                       const bfloat16* a, std::int64_t lda, std::int64_t stride_a,
-                       const bfloat16* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<float> beta, float* c, std::int64_t ldc,
-                       std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                       const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
-                       const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<float> beta, std::int32_t* c, std::int64_t ldc,
-                       std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                       std::int64_t n, std::int64_t k, value_or_pointer<float> alpha,
-                       const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
-                       const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b,
-                       value_or_pointer<float> beta, float* c, std::int64_t ldc,
-                       std::int64_t stride_c, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                       diag unit_diag, std::int64_t m, std::int64_t n,
-                       value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                       std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
-                       std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                       diag unit_diag, std::int64_t m, std::int64_t n,
-                       value_or_pointer<double> alpha, const double* a, std::int64_t lda,
-                       std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
-                       std::int64_t batch_size, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                       diag unit_diag, std::int64_t m, std::int64_t n,
-                       value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                       std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
-                       std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
-                       diag unit_diag, std::int64_t m, std::int64_t n,
-                       value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                       std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
-                       std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower,
-                       const transpose* trans, const diag* unit_diag, const std::int64_t* m,
-                       const std::int64_t* n, const float* alpha, const float** a,
-                       const std::int64_t* lda, float** b, const std::int64_t* ldb,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower,
-                       const transpose* trans, const diag* unit_diag, const std::int64_t* m,
-                       const std::int64_t* n, const double* alpha, const double** a,
-                       const std::int64_t* lda, double** b, const std::int64_t* ldb,
-                       std::int64_t group_count, const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower,
-                       const transpose* trans, const diag* unit_diag, const std::int64_t* m,
-                       const std::int64_t* n, const std::complex<float>* alpha,
-                       const std::complex<float>** a, const std::int64_t* lda,
-                       std::complex<float>** b, const std::int64_t* ldb, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower,
-                       const transpose* trans, const diag* unit_diag, const std::int64_t* m,
-                       const std::int64_t* n, const std::complex<double>* alpha,
-                       const std::complex<double>** a, const std::int64_t* lda,
-                       std::complex<double>** b, const std::int64_t* ldb, std::int64_t group_count,
-                       const std::int64_t* group_size,
-                       const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                           std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
-                           std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<double> alpha, const double* a, std::int64_t lda,
-                           std::int64_t stride_a, double* b, std::int64_t ldb,
-                           std::int64_t stride_b, std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<std::complex<float>> alpha,
-                           const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-                           std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
-                           std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<std::complex<double>> alpha,
-                           const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-                           std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
-                           std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<float> alpha, float* ab, std::int64_t lda,
-                           std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<double> alpha, double* ab, std::int64_t lda,
-                           std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<std::complex<float>> alpha, std::complex<float>* ab,
-                           std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                           std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                           value_or_pointer<std::complex<double>> alpha, std::complex<double>* ab,
-                           std::int64_t lda, std::int64_t ldb, std::int64_t stride,
-                           std::int64_t batch_size,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                          std::int64_t n, value_or_pointer<float> alpha, const float* a,
-                          std::int64_t lda, std::int64_t stride_a, value_or_pointer<float> beta,
-                          const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
-                          std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                          const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                          std::int64_t n, value_or_pointer<double> alpha, const double* a,
-                          std::int64_t lda, std::int64_t stride_a, value_or_pointer<double> beta,
-                          const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
-                          std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                          const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                          std::int64_t n, value_or_pointer<std::complex<float>> alpha,
-                          const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-                          value_or_pointer<std::complex<float>> beta, const std::complex<float>* b,
-                          std::int64_t ldb, std::int64_t stride_b, std::complex<float>* c,
-                          std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
-                          const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                          std::int64_t n, value_or_pointer<std::complex<double>> alpha,
-                          const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-                          value_or_pointer<std::complex<double>> beta,
-                          const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
-                          std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
-                          std::int64_t batch_size,
-                          const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<float> alpha, const float* a, std::int64_t lda, float* b,
-                     std::int64_t ldb, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<double> alpha, const double* a, std::int64_t lda, double* b,
-                     std::int64_t ldb, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                     std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                     const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                     std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
-                     const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                      value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                      std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
-                      const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                      value_or_pointer<double> alpha, const double* a, std::int64_t lda,
-                      std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb,
-                      const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                      value_or_pointer<std::complex<float>> alpha, const std::complex<float>* a,
-                      std::int64_t lda, std::int64_t stridea, std::complex<float>* b,
-                      std::int64_t ldb, std::int64_t strideb,
-                      const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                      value_or_pointer<std::complex<double>> alpha, const std::complex<double>* a,
-                      std::int64_t lda, std::int64_t stridea, std::complex<double>* b,
-                      std::int64_t ldb, std::int64_t strideb,
-                      const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<float> alpha, float* ab, std::int64_t lda, std::int64_t ldb,
-                     const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<double> alpha, double* ab, std::int64_t lda, std::int64_t ldb,
-                     const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<std::complex<float>> alpha, std::complex<float>* ab,
-                     std::int64_t lda, std::int64_t ldb,
-                     const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
-                     value_or_pointer<std::complex<double>> alpha, std::complex<double>* ab,
-                     std::int64_t lda, std::int64_t ldb,
-                     const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                    std::int64_t n, value_or_pointer<float> alpha, const float* a, std::int64_t lda,
-                    value_or_pointer<float> beta, const float* b, std::int64_t ldb, float* c,
-                    std::int64_t ldc, const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                    std::int64_t n, value_or_pointer<double> alpha, const double* a,
-                    std::int64_t lda, value_or_pointer<double> beta, const double* b,
-                    std::int64_t ldb, double* c, std::int64_t ldc,
-                    const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                    std::int64_t n, value_or_pointer<std::complex<float>> alpha,
-                    const std::complex<float>* a, std::int64_t lda,
-                    value_or_pointer<std::complex<float>> beta, const std::complex<float>* b,
-                    std::int64_t ldb, std::complex<float>* c, std::int64_t ldc,
-                    const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
-                    std::int64_t n, value_or_pointer<std::complex<double>> alpha,
-                    const std::complex<double>* a, std::int64_t lda,
-                    value_or_pointer<std::complex<double>> beta, const std::complex<double>* b,
-                    std::int64_t ldb, std::complex<double>* c, std::int64_t ldc,
-                    const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const float* alpha, const float** a,
-                           const std::int64_t* lda, float** b, const std::int64_t* ldb,
-                           std::int64_t group_count, const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const double* alpha, const double** a,
-                           const std::int64_t* lda, double** b, const std::int64_t* ldb,
-                           std::int64_t group_count, const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const std::complex<float>* alpha,
-                           const std::complex<float>** a, const std::int64_t* lda,
-                           std::complex<float>** b, const std::int64_t* ldb,
-                           std::int64_t group_count, const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const std::complex<double>* alpha,
-                           const std::complex<double>** a, const std::int64_t* lda,
-                           std::complex<double>** b, const std::int64_t* ldb,
-                           std::int64_t group_count, const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const float* alpha, float** ab,
-                           const std::int64_t* lda, const std::int64_t* ldb,
-                           std::int64_t group_count, const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const double* alpha, double** ab,
-                           const std::int64_t* lda, const std::int64_t* ldb,
-                           std::int64_t group_count, const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const std::complex<float>* alpha,
-                           std::complex<float>** ab, const std::int64_t* lda,
-                           const std::int64_t* ldb, std::int64_t group_count,
-                           const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
-
-sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m,
-                           const std::int64_t* n, const std::complex<double>* alpha,
-                           std::complex<double>** ab, const std::int64_t* lda,
-                           const std::int64_t* ldb, std::int64_t group_count,
-                           const std::int64_t* groupsize,
-                           const std::vector<sycl::event>& dependencies = {});
diff --git a/src/blas/backends/mkl_common/mkl_extensions.cxx b/src/blas/backends/mkl_common/mkl_extensions.cxx
index 171e2251a..bca426982 100644
--- a/src/blas/backends/mkl_common/mkl_extensions.cxx
+++ b/src/blas/backends/mkl_common/mkl_extensions.cxx
@@ -23,86 +23,102 @@ void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset of
                int64_t n, int64_t k, float alpha, sycl::buffer<int8_t, 1>& a, int64_t lda,
                int8_t ao, sycl::buffer<int8_t, 1>& b, int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo,
-                          beta, c, ldc, co);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co));
 }
 
 void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m,
                int64_t n, int64_t k, float alpha, sycl::buffer<int8_t, 1>& a, int64_t lda,
                int8_t ao, sycl::buffer<uint8_t, 1>& b, int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo,
-                          beta, c, ldc, co);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co));
 }
 
 void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m,
                int64_t n, int64_t k, float alpha, sycl::buffer<uint8_t, 1>& a, int64_t lda,
                uint8_t ao, sycl::buffer<int8_t, 1>& b, int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo,
-                          beta, c, ldc, co);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co));
 }
 
 void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m,
                int64_t n, int64_t k, float alpha, sycl::buffer<uint8_t, 1>& a, int64_t lda,
                uint8_t ao, sycl::buffer<uint8_t, 1>& b, int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, int64_t ldc, sycl::buffer<int32_t, 1>& co) {
-    blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo,
-                          beta, c, ldc, co);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co));
 }
 
 void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n,
            int64_t k, float alpha, sycl::buffer<float, 1>& a, int64_t lda,
            sycl::buffer<float, 1>& b, int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            int64_t ldc) {
-    blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c,
-                      ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n,
            int64_t k, double alpha, sycl::buffer<double, 1>& a, int64_t lda,
            sycl::buffer<double, 1>& b, int64_t ldb, double beta, sycl::buffer<double, 1>& c,
            int64_t ldc) {
-    blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c,
-                      ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n,
            int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
            int64_t lda, sycl::buffer<std::complex<double>, 1>& b, int64_t ldb,
            std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c,
-                      ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n,
            int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
            int64_t lda, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb,
            std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c,
-                      ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc));
 }
 
 void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
               sycl::buffer<float, 1>& a, int64_t lda, sycl::buffer<float, 1>& b, int64_t ldb) {
-    blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, b, ldb));
 }
 
 void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
               sycl::buffer<double, 1>& a, int64_t lda, sycl::buffer<double, 1>& b, int64_t ldb) {
-    blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, b, ldb));
 }
 
 void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex<float> alpha,
               sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
               sycl::buffer<std::complex<float>, 1>& b, int64_t ldb) {
-    blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, b, ldb));
 }
 
 void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex<double> alpha,
               sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
               sycl::buffer<std::complex<double>, 1>& b, int64_t ldb) {
-    blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, b, ldb));
 }
 
 void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
@@ -132,48 +148,60 @@ void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
 
 void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
               sycl::buffer<float, 1>& ab, int64_t lda, int64_t ldb) {
-    blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, ab, lda, ldb));
 }
 
 void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
               sycl::buffer<double, 1>& ab, int64_t lda, int64_t ldb) {
-    blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, ab, lda, ldb));
 }
 
 void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex<float> alpha,
               sycl::buffer<std::complex<float>, 1>& ab, int64_t lda, int64_t ldb) {
-    blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, ab, lda, ldb));
 }
 
 void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex<double> alpha,
               sycl::buffer<std::complex<double>, 1>& ab, int64_t lda, int64_t ldb) {
-    blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, ab, lda, ldb));
 }
 
 void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
              float alpha, sycl::buffer<float, 1>& a, int64_t lda, float beta,
              sycl::buffer<float, 1>& b, int64_t ldb, sycl::buffer<float, 1>& c, int64_t ldc) {
-    blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa),
+                                                  detail::get_onemkl_transpose(transb), m, n, alpha,
+                                                  a, lda, beta, b, ldb, c, ldc));
 }
 
 void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
              double alpha, sycl::buffer<double, 1>& a, int64_t lda, double beta,
              sycl::buffer<double, 1>& b, int64_t ldb, sycl::buffer<double, 1>& c, int64_t ldc) {
-    blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa),
+                                                  detail::get_onemkl_transpose(transb), m, n, alpha,
+                                                  a, lda, beta, b, ldb, c, ldc));
 }
 
 void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
              std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, int64_t lda,
              std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b, int64_t ldb,
              sycl::buffer<std::complex<float>, 1>& c, int64_t ldc) {
-    blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa),
+                                                  detail::get_onemkl_transpose(transb), m, n, alpha,
+                                                  a, lda, beta, b, ldb, c, ldc));
 }
 
 void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
              std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, int64_t lda,
              std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& b, int64_t ldb,
              sycl::buffer<std::complex<double>, 1>& c, int64_t ldc) {
-    blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa),
+                                                  detail::get_onemkl_transpose(transb), m, n, alpha,
+                                                  a, lda, beta, b, ldb, c, ldc));
 }
 
 // USM APIs
@@ -183,8 +211,10 @@ sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, of
                       int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c,
                       int64_t ldc, const int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb,
-                                 bo, beta, c, ldc, co, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co, dependencies));
 }
 
 sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
@@ -192,8 +222,10 @@ sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, of
                       int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c,
                       int64_t ldc, const int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb,
-                                 bo, beta, c, ldc, co, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co, dependencies));
 }
 
 sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
@@ -201,8 +233,10 @@ sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, of
                       uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c,
                       int64_t ldc, const int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb,
-                                 bo, beta, c, ldc, co, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co, dependencies));
 }
 
 sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc,
@@ -210,24 +244,30 @@ sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, of
                       uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c,
                       int64_t ldc, const int32_t* co,
                       const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb,
-                                 bo, beta, c, ldc, co, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb),
+        detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc,
+        co, dependencies));
 }
 
 sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
                   int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b,
                   int64_t ldb, float beta, float* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta,
-                             c, ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+        dependencies));
 }
 
 sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
                   int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b,
                   int64_t ldb, double beta, double* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta,
-                             c, ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+        dependencies));
 }
 
 sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
@@ -235,8 +275,10 @@ sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transp
                   int64_t lda, const std::complex<float>* b, int64_t ldb, std::complex<float> beta,
                   std::complex<float>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta,
-                             c, ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+        dependencies));
 }
 
 sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb,
@@ -244,34 +286,40 @@ sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transp
                   int64_t lda, const std::complex<double>* b, int64_t ldb,
                   std::complex<double> beta, std::complex<double>* c, int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta,
-                             c, ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+        detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc,
+        dependencies));
 }
 
 sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
                      const float* a, int64_t lda, float* b, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, a, lda, b, ldb, dependencies));
 }
 
 sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
                      const double* a, int64_t lda, double* b, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, a, lda, b, ldb, dependencies));
 }
 
 sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                      std::complex<float> alpha, const std::complex<float>* a, int64_t lda,
                      std::complex<float>* b, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, a, lda, b, ldb, dependencies));
 }
 
 sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                      std::complex<double> alpha, const std::complex<double>* a, int64_t lda,
                      std::complex<double>* b, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, a, lda, b, ldb, dependencies));
 }
 
 sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
@@ -303,41 +351,47 @@ sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
 sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha,
                      float* ab, int64_t lda, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, ab, lda, ldb, dependencies));
 }
 
 sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha,
                      double* ab, int64_t lda, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, ab, lda, ldb, dependencies));
 }
 
 sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                      std::complex<float> alpha, std::complex<float>* ab, int64_t lda, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, ab, lda, ldb, dependencies));
 }
 
 sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                      std::complex<double> alpha, std::complex<double>* ab, int64_t lda, int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
-    return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans),
+                                                       m, n, alpha, ab, lda, ldb, dependencies));
 }
 
 sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
                     float alpha, const float* a, int64_t lda, float beta, const float* b,
                     int64_t ldb, float* c, int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc,
-                               dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, beta, b, ldb, c, ldc, dependencies));
 }
 
 sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
                     double alpha, const double* a, int64_t lda, double beta, const double* b,
                     int64_t ldb, double* c, int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc,
-                               dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, beta, b, ldb, c, ldc, dependencies));
 }
 
 sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -345,8 +399,9 @@ sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int6
                     std::complex<float> beta, const std::complex<float>* b, int64_t ldb,
                     std::complex<float>* c, int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc,
-                               dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, beta, b, ldb, c, ldc, dependencies));
 }
 
 sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n,
@@ -354,6 +409,7 @@ sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int6
                     std::complex<double> beta, const std::complex<double>* b, int64_t ldb,
                     std::complex<double>* c, int64_t ldc,
                     const std::vector<sycl::event>& dependencies) {
-    return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc,
-                               dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n,
+        alpha, a, lda, beta, b, ldb, c, ldc, dependencies));
 }
diff --git a/src/blas/backends/mkl_common/mkl_level1.cxx b/src/blas/backends/mkl_common/mkl_level1.cxx
index d109282d8..fac39ad36 100644
--- a/src/blas/backends/mkl_common/mkl_level1.cxx
+++ b/src/blas/backends/mkl_common/mkl_level1.cxx
@@ -21,625 +21,643 @@
 
 void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    blas_major::asum(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result));
 }
 
 void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    blas_major::asum(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result));
 }
 
 void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    blas_major::asum(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result));
 }
 
 void asum(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    blas_major::asum(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result));
 }
 
 void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    blas_major::axpy(queue, n, alpha, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy));
 }
 
 void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    blas_major::axpy(queue, n, alpha, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy));
 }
 
 void axpy(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::axpy(queue, n, alpha, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy));
 }
 
 void axpy(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::axpy(queue, n, alpha, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy));
 }
 
 void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
            std::int64_t incx, float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy));
 }
 
 void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
            std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy));
 }
 
 void axpby(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy));
 }
 
 void axpby(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy));
 }
 
 void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    blas_major::copy(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy));
 }
 
 void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& y, std::int64_t incy) {
-    blas_major::copy(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy));
 }
 
 void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::copy(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy));
 }
 
 void copy(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::copy(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy));
 }
 
 void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
          sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& result) {
-    blas_major::dot(queue, n, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dot(queue, n, x, incx, y, incy, result));
 }
 
 void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
          sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& result) {
-    blas_major::dot(queue, n, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dot(queue, n, x, incx, y, incy, result));
 }
 
 void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer<float, 1>& x,
             std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
             sycl::buffer<float, 1>& result) {
-    blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result));
 }
 
 void dot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
          sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& result) {
-    blas_major::dot(queue, n, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dot(queue, n, x, incx, y, incy, result));
 }
 
 void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    blas_major::dotc(queue, n, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotc(queue, n, x, incx, y, incy, result));
 }
 
 void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    blas_major::dotc(queue, n, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotc(queue, n, x, incx, y, incy, result));
 }
 
 void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
-    blas_major::dotu(queue, n, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotu(queue, n, x, incx, y, incy, result));
 }
 
 void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
-    blas_major::dotu(queue, n, x, incx, y, incy, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotu(queue, n, x, incx, y, incy, result));
 }
 
 void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& result) {
-    blas_major::nrm2(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result));
 }
 
 void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& result) {
-    blas_major::nrm2(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result));
 }
 
 void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
-    blas_major::nrm2(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result));
 }
 
 void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
-    blas_major::nrm2(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result));
 }
 
 void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
          std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c,
          float s) {
-    blas_major::rot(queue, n, x, incx, y, incy, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s));
 }
 
 void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
          std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c,
          double s) {
-    blas_major::rot(queue, n, x, incx, y, incy, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s));
 }
 
 void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
          sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
-    blas_major::rot(queue, n, x, incx, y, incy, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s));
 }
 
 void rot(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
          sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
-    blas_major::rot(queue, n, x, incx, y, incy, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s));
 }
 
 void rotg(sycl::queue& queue, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& b,
           sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
-    blas_major::rotg(queue, a, b, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s));
 }
 
 void rotg(sycl::queue& queue, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& b,
           sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
-    blas_major::rotg(queue, a, b, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s));
 }
 
 void rotg(sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
-    blas_major::rotg(queue, a, b, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s));
 }
 
 void rotg(sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
-    blas_major::rotg(queue, a, b, c, s);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s));
 }
 
 void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& param) {
-    blas_major::rotm(queue, n, x, incx, y, incy, param);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotm(queue, n, x, incx, y, incy, param));
 }
 
 void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& y, std::int64_t incy, sycl::buffer<double, 1>& param) {
-    blas_major::rotm(queue, n, x, incx, y, incy, param);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotm(queue, n, x, incx, y, incy, param));
 }
 
 void rotmg(sycl::queue& queue, sycl::buffer<float, 1>& d1, sycl::buffer<float, 1>& d2,
            sycl::buffer<float, 1>& x1, float y1, sycl::buffer<float, 1>& param) {
-    blas_major::rotmg(queue, d1, d2, x1, y1, param);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotmg(queue, d1, d2, x1, y1, param));
 }
 
 void rotmg(sycl::queue& queue, sycl::buffer<double, 1>& d1, sycl::buffer<double, 1>& d2,
            sycl::buffer<double, 1>& x1, double y1, sycl::buffer<double, 1>& param) {
-    blas_major::rotmg(queue, d1, d2, x1, y1, param);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotmg(queue, d1, d2, x1, y1, param));
 }
 
 void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    blas_major::scal(queue, n, alpha, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx));
 }
 
 void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    blas_major::scal(queue, n, alpha, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx));
 }
 
 void scal(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    blas_major::scal(queue, n, alpha, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx));
 }
 
 void scal(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    blas_major::scal(queue, n, alpha, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx));
 }
 
 void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx) {
-    blas_major::scal(queue, n, alpha, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx));
 }
 
 void scal(sycl::queue& queue, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    blas_major::scal(queue, n, alpha, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx));
 }
 
 void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    blas_major::swap(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy));
 }
 
 void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& y, std::int64_t incy) {
-    blas_major::swap(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy));
 }
 
 void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::swap(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy));
 }
 
 void swap(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::swap(queue, n, x, incx, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy));
 }
 
 void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamax(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result));
 }
 
 void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamax(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result));
 }
 
 void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamax(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result));
 }
 
 void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamax(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result));
 }
 
 void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamin(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result));
 }
 
 void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamin(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result));
 }
 
 void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamin(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result));
 }
 
 void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>, 1>& x,
            std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
-    blas_major::iamin(queue, n, x, incx, result);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result));
 }
 
 // USM APIs
 
 sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::asum(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::asum(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
                  float* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::asum(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
                  double* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::asum(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies));
 }
 
 sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies));
 }
 
 sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies));
 }
 
 sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies));
 }
 
 sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x,
                   std::int64_t incx, float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x,
                   std::int64_t incx, double beta, double* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
                   const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                   std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
                   const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                   std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
                  std::int64_t incx, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
                  std::int64_t incx, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::copy(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
                 const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::dot(queue, n, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dot(queue, n, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
                 const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::dot(queue, n, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dot(queue, n, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx,
                    const float* y, std::int64_t incy, float* result,
                    const std::vector<sycl::event>& dependencies) {
-    return blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
                 const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::dot(queue, n, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dot(queue, n, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
                  std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
                  std::complex<float>* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
                  std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
                  std::complex<double>* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
                  std::int64_t incx, const std::complex<float>* y, std::int64_t incy,
                  std::complex<float>* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
                  std::int64_t incx, const std::complex<double>* y, std::int64_t incy,
                  std::complex<double>* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies));
 }
 
 sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::nrm2(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::nrm2(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
                  float* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::nrm2(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
                  double* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::nrm2(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies));
 }
 
 sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies));
 }
 
 sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y,
                 std::int64_t incy, float c, float s, const std::vector<sycl::event>& dependencies) {
-    return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies));
 }
 
 sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y,
                 std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies));
 }
 
 sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotg(queue, a, b, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies));
 }
 
 sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotg(queue, a, b, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies));
 }
 
 sycl::event rotg(sycl::queue& queue, std::complex<float>* a, std::complex<float>* b, float* c,
                  std::complex<float>* s, const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotg(queue, a, b, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies));
 }
 
 sycl::event rotg(sycl::queue& queue, std::complex<double>* a, std::complex<double>* b, double* c,
                  std::complex<double>* s, const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotg(queue, a, b, c, s, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies));
 }
 
 sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y,
                  std::int64_t incy, float* param, const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies));
 }
 
 sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y,
                  std::int64_t incy, double* param, const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies));
 }
 
 sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies));
 }
 
 sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies));
 }
 
 sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::scal(queue, n, alpha, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies));
 }
 
 sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::scal(queue, n, alpha, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies));
 }
 
 sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::scal(queue, n, alpha, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies));
 }
 
 sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex<double> alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::scal(queue, n, alpha, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies));
 }
 
 sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex<float>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    return blas_major::scal(queue, n, alpha, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies));
 }
 
 sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex<double>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    return blas_major::scal(queue, n, alpha, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies));
 }
 
 sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::swap(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::swap(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::swap(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::swap(queue, n, x, incx, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies));
 }
 
 sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
                   std::int64_t* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamax(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
                   std::int64_t* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamax(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamax(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamax(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx,
                   std::int64_t* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamin(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx,
                   std::int64_t* result, const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamin(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<float>* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamin(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies));
 }
 
 sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex<double>* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::iamin(queue, n, x, incx, result, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies));
 }
diff --git a/src/blas/backends/mkl_common/mkl_level2.cxx b/src/blas/backends/mkl_common/mkl_level2.cxx
index 56fa591dc..0b0f40c6c 100644
--- a/src/blas/backends/mkl_common/mkl_level2.cxx
+++ b/src/blas/backends/mkl_common/mkl_level2.cxx
@@ -22,409 +22,493 @@
 void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                               alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                               alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                               alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                               alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
           std::int64_t ku, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl,
+                                               ku, alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
           std::int64_t ku, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl,
+                                               ku, alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
           std::int64_t ku, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl,
+                                               ku, alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl,
           std::int64_t ku, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl,
+                                               ku, alpha, a, lda, x, incx, beta, y, incy));
 }
 
 void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
-    blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda));
 }
 
 void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha,
          sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda));
 }
 
 void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda));
 }
 
 void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda));
 }
 
 void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda));
 }
 
 void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda));
 }
 
 void hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
-    blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
-    blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void her(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    blas_major::her(queue, uplo, n, alpha, x, incx, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda));
 }
 
 void her(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    blas_major::her(queue, uplo, n, alpha, x, incx, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda));
 }
 
 void her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
-    blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda));
 }
 
 void her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
-    blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda));
 }
 
 void hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
           std::int64_t incy) {
-    blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x,
+                                               incx, beta, y, incy));
 }
 
 void hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx, std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
           std::int64_t incy) {
-    blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x,
+                                               incx, beta, y, incy));
 }
 
 void hpr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
-    blas_major::hpr(queue, uplo, n, alpha, x, incx, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a));
 }
 
 void hpr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
-    blas_major::hpr(queue, uplo, n, alpha, x, incx, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a));
 }
 
 void hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
-    blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a));
 }
 
 void hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
-    blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a));
 }
 
 void sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
-    blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
-    blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void spmv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
-    blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x,
+                                               incx, beta, y, incy));
 }
 
 void spmv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
-    blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x,
+                                               incx, beta, y, incy));
 }
 
 void spr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& a) {
-    blas_major::spr(queue, uplo, n, alpha, x, incx, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a));
 }
 
 void spr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& a) {
-    blas_major::spr(queue, uplo, n, alpha, x, incx, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a));
 }
 
 void spr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
           sycl::buffer<float, 1>& a) {
-    blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a));
 }
 
 void spr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
           sycl::buffer<double, 1>& a) {
-    blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a));
 }
 
 void symv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
-    blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void symv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
           sycl::buffer<double, 1>& y, std::int64_t incy) {
-    blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a,
+                                               lda, x, incx, beta, y, incy));
 }
 
 void syr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& a, std::int64_t lda) {
-    blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda));
 }
 
 void syr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
          std::int64_t incx, sycl::buffer<double, 1>& a, std::int64_t lda) {
-    blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda));
 }
 
 void syr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
           std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
           sycl::buffer<float, 1>& a, std::int64_t lda) {
-    blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda));
 }
 
 void syr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
           std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
           sycl::buffer<double, 1>& a, std::int64_t lda) {
-    blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda));
 }
 
 void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx));
 }
 
 void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& a, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx) {
-    blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& a, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx) {
-    blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx) {
-    blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx) {
-    blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& a, sycl::buffer<std::complex<float>, 1>& x,
           std::int64_t incx) {
-    blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& a, sycl::buffer<std::complex<double>, 1>& x,
           std::int64_t incx) {
-    blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, x, incx));
 }
 
 void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
-    blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
-    blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
-    blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
-    blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo),
+                                               detail::get_onemkl_transpose(trans),
+                                               detail::get_onemkl_diag(diag), n, a, lda, x, incx));
 }
 
 // USM APIs
@@ -432,15 +516,17 @@ void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_
 sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
@@ -448,8 +534,9 @@ sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
@@ -457,24 +544,27 @@ sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
                  std::int64_t kl, std::int64_t ku, float alpha, const float* a, std::int64_t lda,
                  const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   kl, ku, alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
                  std::int64_t kl, std::int64_t ku, double alpha, const double* a, std::int64_t lda,
                  const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   kl, ku, alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
@@ -482,8 +572,9 @@ sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* x,
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   kl, ku, alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n,
@@ -491,48 +582,55 @@ sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* x,
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n,
+                                                   kl, ku, alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k,
@@ -540,7 +638,9 @@ sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k,
@@ -548,315 +648,393 @@ sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* x,
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, lda, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* x,
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, lda, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event her(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha,
                 const std::complex<float>* x, std::int64_t incx, std::complex<float>* a,
                 std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    return blas_major::her(queue, uplo, n, alpha, x, incx, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x,
+                                                  incx, a, lda, dependencies));
 }
 
 sycl::event her(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha,
                 const std::complex<double>* x, std::int64_t incx, std::complex<double>* a,
                 std::int64_t lda, const std::vector<sycl::event>& dependencies) {
-    return blas_major::her(queue, uplo, n, alpha, x, incx, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x,
+                                                  incx, a, lda, dependencies));
 }
 
 sycl::event her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event hpr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha,
                 const std::complex<float>* x, std::int64_t incx, std::complex<float>* a,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::hpr(queue, uplo, n, alpha, x, incx, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies));
 }
 
 sycl::event hpr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha,
                 const std::complex<double>* x, std::int64_t incx, std::complex<double>* a,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::hpr(queue, uplo, n, alpha, x, incx, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies));
 }
 
 sycl::event hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, dependencies));
 }
 
 sycl::event hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, dependencies));
 }
 
 sycl::event sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha,
                  const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha,
                  const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k,
+                                                   alpha, a, lda, x, incx, beta, y, incy,
+                                                   dependencies));
 }
 
 sycl::event spmv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* a,
                  const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event spmv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* a,
                  const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event spr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x,
                 std::int64_t incx, float* a, const std::vector<sycl::event>& dependencies) {
-    return blas_major::spr(queue, uplo, n, alpha, x, incx, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies));
 }
 
 sycl::event spr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x,
                 std::int64_t incx, double* a, const std::vector<sycl::event>& dependencies) {
-    return blas_major::spr(queue, uplo, n, alpha, x, incx, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies));
 }
 
 sycl::event spr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x,
                  std::int64_t incx, const float* y, std::int64_t incy, float* a,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, dependencies));
 }
 
 sycl::event spr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x,
                  std::int64_t incx, const double* y, std::int64_t incy, double* a,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, dependencies));
 }
 
 sycl::event symv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* a,
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, lda, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event symv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* a,
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
-    return blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   a, lda, x, incx, beta, y, incy, dependencies));
 }
 
 sycl::event syr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x,
                 std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x,
+                                                  incx, a, lda, dependencies));
 }
 
 sycl::event syr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x,
                 std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x,
+                                                  incx, a, lda, dependencies));
 }
 
 sycl::event syr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x,
                  std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event syr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x,
                  std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha,
+                                                   x, incx, y, incy, a, lda, dependencies));
 }
 
 sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  std::int64_t k, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies));
 }
 
 sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<float>* a, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<double>* a, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<float>* a, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<double>* a, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, x, incx, dependencies));
 }
 
 sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const float* a, std::int64_t lda, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
 
 sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const double* a, std::int64_t lda, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
 
 sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<float>* a, std::int64_t lda, std::complex<float>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
 
 sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<double>* a, std::int64_t lda, std::complex<double>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
 
 sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const float* a, std::int64_t lda, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
 
 sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const double* a, std::int64_t lda, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
 
 sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<float>* a, std::int64_t lda, std::complex<float>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
 
 sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n,
                  const std::complex<double>* a, std::int64_t lda, std::complex<double>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+                         detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies));
 }
diff --git a/src/blas/backends/mkl_common/mkl_level3.cxx b/src/blas/backends/mkl_common/mkl_level3.cxx
index e67afc26d..e76dac543 100644
--- a/src/blas/backends/mkl_common/mkl_level3.cxx
+++ b/src/blas/backends/mkl_common/mkl_level3.cxx
@@ -23,225 +23,291 @@ void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa),
+                                               detail::get_onemkl_transpose(transb), m, n, k, alpha,
+                                               a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& b, std::int64_t ldb, double beta, sycl::buffer<double, 1>& c,
           std::int64_t ldc) {
-    blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa),
+                                               detail::get_onemkl_transpose(transb), m, n, k, alpha,
+                                               a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa),
+                                               detail::get_onemkl_transpose(transb), m, n, k, alpha,
+                                               a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa),
+                                               detail::get_onemkl_transpose(transb), m, n, k, alpha,
+                                               a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
           std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
           sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, sycl::half beta,
           sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
-    blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa),
+                                               detail::get_onemkl_transpose(transb), m, n, k, alpha,
+                                               a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
           sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa),
+                                               detail::get_onemkl_transpose(transb), m, n, k, alpha,
+                                               a, lda, b, ldb, beta, c, ldc));
 }
 
 void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a, std::int64_t lda,
           sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
-    blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa),
+                                               detail::get_onemkl_transpose(transb), m, n, k, alpha,
+                                               a, lda, b, ldb, beta, c, ldc));
 }
 
 void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right),
+                                               detail::get_onemkl_uplo(upper_lower), m, n, alpha, a,
+                                               lda, b, ldb, beta, c, ldc));
 }
 
 void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right),
+                                               detail::get_onemkl_uplo(upper_lower), m, n, alpha, a,
+                                               lda, b, ldb, beta, c, ldc));
 }
 
 void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right),
+                                               detail::get_onemkl_uplo(upper_lower), m, n, alpha, a,
+                                               lda, b, ldb, beta, c, ldc));
 }
 
 void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right),
+                                               detail::get_onemkl_uplo(upper_lower), m, n, alpha, a,
+                                               lda, b, ldb, beta, c, ldc));
 }
 
 void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemm(queue, detail::get_onemkl_side(left_right),
+                                               detail::get_onemkl_uplo(upper_lower), m, n, alpha, a,
+                                               lda, b, ldb, beta, c, ldc));
 }
 
 void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemm(queue, detail::get_onemkl_side(left_right),
+                                               detail::get_onemkl_uplo(upper_lower), m, n, alpha, a,
+                                               lda, b, ldb, beta, c, ldc));
 }
 
 void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
           float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                               detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                               lda, beta, c, ldc));
 }
 
 void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                               detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                               lda, beta, c, ldc));
 }
 
 void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                               detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                               lda, beta, c, ldc));
 }
 
 void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                               detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                               lda, beta, c, ldc));
 }
 
 void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
           float alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, float beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower),
+                                               detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                               lda, beta, c, ldc));
 }
 
 void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
           double alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, double beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower),
+                                               detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                               lda, beta, c, ldc));
 }
 
 void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
            float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
            std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
-    blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower),
+                                                detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                                lda, b, ldb, beta, c, ldc));
 }
 
 void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
            double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
-    blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower),
+                                                detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                                lda, b, ldb, beta, c, ldc));
 }
 
 void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
            std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower),
+                                                detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                                lda, b, ldb, beta, c, ldc));
 }
 
 void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
            std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower),
+                                                detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                                lda, b, ldb, beta, c, ldc));
 }
 
 void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
            std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
-    blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower),
+                                                detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                                lda, b, ldb, beta, c, ldc));
 }
 
 void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k,
            std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
-    blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower),
+                                                detail::get_onemkl_transpose(trans), n, k, alpha, a,
+                                                lda, b, ldb, beta, c, ldc));
 }
 
 void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trmm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trmm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trmm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trmm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb) {
-    blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trsm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb) {
-    blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trsm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
-    blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trsm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag,
           std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
-    blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b,
-                     ldb);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        blas_major::trsm(queue, detail::get_onemkl_side(left_right),
+                         detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa),
+                         detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb));
 }
 
 // USM APIs
@@ -250,16 +316,18 @@ sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::in
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
@@ -267,8 +335,9 @@ sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::in
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
@@ -276,48 +345,54 @@ sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::in
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
                  std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a,
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, sycl::half beta,
                  sycl::half* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
                  std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda,
                  const sycl::half* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m,
                  std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda,
                  const bfloat16* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(
+        queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* b,
                  std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c,
-                            ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* b,
                  std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c,
-                            ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
@@ -325,8 +400,9 @@ sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int
                  std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
                  std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c,
-                            ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
@@ -334,8 +410,9 @@ sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int
                  std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
                  std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c,
-                            ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
@@ -343,8 +420,9 @@ sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int
                  std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
                  std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c,
-                            ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m,
@@ -352,70 +430,79 @@ sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int
                  std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
                  std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c,
-                            ldc, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                  std::int64_t k, float alpha, const float* a, std::int64_t lda, float beta,
                  float* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                                   detail::get_onemkl_transpose(trans), n, k, alpha,
+                                                   a, lda, beta, c, ldc, dependencies));
 }
 
 sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                  std::int64_t k, double alpha, const double* a, std::int64_t lda, double beta,
                  double* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                                   detail::get_onemkl_transpose(trans), n, k, alpha,
+                                                   a, lda, beta, c, ldc, dependencies));
 }
 
 sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                  std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float> beta, std::complex<float>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                                   detail::get_onemkl_transpose(trans), n, k, alpha,
+                                                   a, lda, beta, c, ldc, dependencies));
 }
 
 sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                  std::int64_t k, std::complex<double> alpha, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double> beta, std::complex<double>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower),
+                                                   detail::get_onemkl_transpose(trans), n, k, alpha,
+                                                   a, lda, beta, c, ldc, dependencies));
 }
 
 sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                  std::int64_t k, float alpha, const std::complex<float>* a, std::int64_t lda,
                  float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower),
+                                                   detail::get_onemkl_transpose(trans), n, k, alpha,
+                                                   a, lda, beta, c, ldc, dependencies));
 }
 
 sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                  std::int64_t k, double alpha, const std::complex<double>* a, std::int64_t lda,
                  double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc,
-                            dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower),
+                                                   detail::get_onemkl_transpose(trans), n, k, alpha,
+                                                   a, lda, beta, c, ldc, dependencies));
 }
 
 sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                   std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b,
                   std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                             dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
                   std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b,
                   std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                             dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
@@ -423,8 +510,9 @@ sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::in
                   std::int64_t lda, const std::complex<float>* b, std::int64_t ldb,
                   std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                             dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
@@ -432,8 +520,9 @@ sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::in
                   std::int64_t lda, const std::complex<double>* b, std::int64_t ldb,
                   std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                             dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
@@ -441,8 +530,9 @@ sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::in
                   std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, float beta,
                   std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                             dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2k(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n,
@@ -450,70 +540,87 @@ sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::in
                   std::int64_t lda, const std::complex<double>* b, std::int64_t ldb, double beta,
                   std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
-    return blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc,
-                             dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2k(
+        queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k,
+        alpha, a, lda, b, ldb, beta, c, ldc, dependencies));
 }
 
 sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a,
                  std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
 
 sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a,
                  std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
 
 sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                  std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
 
 sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                  std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
 
 sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a,
                  std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
 
 sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a,
                  std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
 
 sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                  std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
 
 sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa,
                  diag unit_diag, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                  std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
-    return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda,
-                            b, ldb, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(
+        queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower),
+        detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a,
+        lda, b, ldb, dependencies));
 }
diff --git a/src/blas/backends/mklcpu/CMakeLists.txt b/src/blas/backends/mklcpu/CMakeLists.txt
index 322741d26..592546ee8 100644
--- a/src/blas/backends/mklcpu/CMakeLists.txt
+++ b/src/blas/backends/mklcpu/CMakeLists.txt
@@ -17,15 +17,16 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_blas_mklcpu)
+set(LIB_NAME onemath_blas_mklcpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 set(SOURCES
   mklcpu_level1.cpp mklcpu_level2.cpp mklcpu_level3.cpp mklcpu_batch.cpp mklcpu_extensions.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: mklcpu_wrappers.cpp>)
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_blas ${LIB_NAME})
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET ${LIB_OBJ} SOURCES ${SOURCES})
 endif()
@@ -35,16 +36,17 @@ target_include_directories(${LIB_OBJ}
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::BLAS)
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_SYCL::BLAS)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_SYCL::BLAS)
 else()
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_DPCPP)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_DPCPP)
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
@@ -54,7 +56,7 @@ target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 #Set libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -67,8 +69,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/blas/backends/mklcpu/mklcpu_batch.cpp b/src/blas/backends/mklcpu/mklcpu_batch.cpp
index 5ecf4cc69..cda964f7d 100644
--- a/src/blas/backends/mklcpu/mklcpu_batch.cpp
+++ b/src/blas/backends/mklcpu/mklcpu_batch.cpp
@@ -23,13 +23,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"
+#include "oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklcpu {
 namespace column_major {
@@ -46,5 +46,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklcpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklcpu/mklcpu_extensions.cpp b/src/blas/backends/mklcpu/mklcpu_extensions.cpp
index 215addd5e..3ddf73c80 100644
--- a/src/blas/backends/mklcpu/mklcpu_extensions.cpp
+++ b/src/blas/backends/mklcpu/mklcpu_extensions.cpp
@@ -23,13 +23,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"
+#include "oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklcpu {
 namespace column_major {
@@ -46,5 +46,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklcpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklcpu/mklcpu_level1.cpp b/src/blas/backends/mklcpu/mklcpu_level1.cpp
index a4d786673..846cb6068 100644
--- a/src/blas/backends/mklcpu/mklcpu_level1.cpp
+++ b/src/blas/backends/mklcpu/mklcpu_level1.cpp
@@ -23,12 +23,12 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"
+#include "oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp"
 
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklcpu {
 namespace column_major {
@@ -45,5 +45,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklcpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklcpu/mklcpu_level2.cpp b/src/blas/backends/mklcpu/mklcpu_level2.cpp
index 7bd46078c..4b62d1bd0 100644
--- a/src/blas/backends/mklcpu/mklcpu_level2.cpp
+++ b/src/blas/backends/mklcpu/mklcpu_level2.cpp
@@ -23,12 +23,12 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"
+#include "oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp"
 
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklcpu {
 namespace column_major {
@@ -45,5 +45,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklcpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklcpu/mklcpu_level3.cpp b/src/blas/backends/mklcpu/mklcpu_level3.cpp
index 6433fc98b..4d4e5e915 100644
--- a/src/blas/backends/mklcpu/mklcpu_level3.cpp
+++ b/src/blas/backends/mklcpu/mklcpu_level3.cpp
@@ -23,12 +23,12 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"
+#include "oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp"
 
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklcpu {
 namespace column_major {
@@ -45,5 +45,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklcpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklcpu/mklcpu_wrappers.cpp b/src/blas/backends/mklcpu/mklcpu_wrappers.cpp
index 527f38a18..13f2d89fb 100644
--- a/src/blas/backends/mklcpu/mklcpu_wrappers.cpp
+++ b/src/blas/backends/mklcpu/mklcpu_wrappers.cpp
@@ -18,11 +18,11 @@
 *******************************************************************************/
 
 #include "blas/function_table.hpp"
-#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"
+#include "oneapi/math/blas/detail/mklcpu/onemath_blas_mklcpu.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT blas_function_table_t mkl_blas_table = {
+extern "C" ONEMATH_EXPORT blas_function_table_t onemath_blas_table = {
     WRAPPER_VERSION,
 #define BACKEND mklcpu
 #define MAJOR   column_major
diff --git a/src/blas/backends/mklgpu/CMakeLists.txt b/src/blas/backends/mklgpu/CMakeLists.txt
index c971d1afd..d15da3519 100644
--- a/src/blas/backends/mklgpu/CMakeLists.txt
+++ b/src/blas/backends/mklgpu/CMakeLists.txt
@@ -17,31 +17,33 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_blas_mklgpu)
+set(LIB_NAME onemath_blas_mklgpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   mklgpu_level1.cpp mklgpu_level2.cpp mklgpu_level3.cpp mklgpu_batch.cpp mklgpu_extensions.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: mklgpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_blas ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::BLAS)
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_SYCL::BLAS)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_SYCL::BLAS)
 else()
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_DPCPP)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_DPCPP)
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
@@ -51,7 +53,7 @@ target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 #Set libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -64,8 +66,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/blas/backends/mklgpu/mklgpu_batch.cpp b/src/blas/backends/mklgpu/mklgpu_batch.cpp
index bad2db82c..427f1d2f5 100644
--- a/src/blas/backends/mklgpu/mklgpu_batch.cpp
+++ b/src/blas/backends/mklgpu/mklgpu_batch.cpp
@@ -23,13 +23,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp"
+#include "oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklgpu {
 namespace column_major {
@@ -46,5 +46,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklgpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklgpu/mklgpu_extensions.cpp b/src/blas/backends/mklgpu/mklgpu_extensions.cpp
index c4b1635c8..709a65b27 100644
--- a/src/blas/backends/mklgpu/mklgpu_extensions.cpp
+++ b/src/blas/backends/mklgpu/mklgpu_extensions.cpp
@@ -23,13 +23,13 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp"
+#include "oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklgpu {
 namespace column_major {
@@ -46,5 +46,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklgpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklgpu/mklgpu_level1.cpp b/src/blas/backends/mklgpu/mklgpu_level1.cpp
index 9d853e23d..96e57ef45 100644
--- a/src/blas/backends/mklgpu/mklgpu_level1.cpp
+++ b/src/blas/backends/mklgpu/mklgpu_level1.cpp
@@ -23,12 +23,12 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp"
+#include "oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp"
 
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklgpu {
 namespace column_major {
@@ -46,5 +46,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklgpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklgpu/mklgpu_level2.cpp b/src/blas/backends/mklgpu/mklgpu_level2.cpp
index 2d3fc6b39..383f8e6fb 100644
--- a/src/blas/backends/mklgpu/mklgpu_level2.cpp
+++ b/src/blas/backends/mklgpu/mklgpu_level2.cpp
@@ -23,12 +23,12 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp"
+#include "oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp"
 
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklgpu {
 namespace column_major {
@@ -45,5 +45,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklgpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklgpu/mklgpu_level3.cpp b/src/blas/backends/mklgpu/mklgpu_level3.cpp
index 6362c4eaa..1307e440a 100644
--- a/src/blas/backends/mklgpu/mklgpu_level3.cpp
+++ b/src/blas/backends/mklgpu/mklgpu_level3.cpp
@@ -23,12 +23,12 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp"
+#include "oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp"
 
 #include "../mkl_common/mkl_blas_backend.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace mklgpu {
 namespace column_major {
@@ -45,5 +45,5 @@ namespace blas_major = ::oneapi::mkl::blas::row_major;
 } // namespace row_major
 } // namespace mklgpu
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/mklgpu/mklgpu_wrappers.cpp b/src/blas/backends/mklgpu/mklgpu_wrappers.cpp
index 39b49a93a..c47019f8c 100644
--- a/src/blas/backends/mklgpu/mklgpu_wrappers.cpp
+++ b/src/blas/backends/mklgpu/mklgpu_wrappers.cpp
@@ -18,11 +18,11 @@
 *******************************************************************************/
 
 #include "blas/function_table.hpp"
-#include "oneapi/mkl/blas/detail/mklgpu/onemkl_blas_mklgpu.hpp"
+#include "oneapi/math/blas/detail/mklgpu/onemath_blas_mklgpu.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT blas_function_table_t mkl_blas_table = {
+extern "C" ONEMATH_EXPORT blas_function_table_t onemath_blas_table = {
     WRAPPER_VERSION,
 #define BACKEND mklgpu
 #define MAJOR   column_major
diff --git a/src/blas/backends/netlib/CMakeLists.txt b/src/blas/backends/netlib/CMakeLists.txt
index fd5275fc0..73b7f6ce5 100644
--- a/src/blas/backends/netlib/CMakeLists.txt
+++ b/src/blas/backends/netlib/CMakeLists.txt
@@ -17,7 +17,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_blas_netlib)
+set(LIB_NAME onemath_blas_netlib)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 # Add third-party library 
@@ -28,8 +28,9 @@ set(SOURCES netlib_common.hpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: netlib_wrappers.cpp>
 )
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_blas ${LIB_NAME})
 
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET ${LIB_OBJ} SOURCES ${SOURCES})
@@ -41,12 +42,12 @@ target_include_directories(${LIB_OBJ}
           ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
           ${NETLIB_INCLUDE}
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL ${NETLIB_LINK})
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL ${NETLIB_LINK})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
@@ -55,7 +56,7 @@ target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -68,8 +69,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/blas/backends/netlib/netlib_batch.cpp b/src/blas/backends/netlib/netlib_batch.cpp
index 69197b09d..09bf4c171 100644
--- a/src/blas/backends/netlib/netlib_batch.cpp
+++ b/src/blas/backends/netlib/netlib_batch.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "netlib_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace netlib {
 namespace column_major {
@@ -47,5 +47,5 @@ namespace row_major {
 } // namespace row_major
 } // namespace netlib
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/netlib/netlib_common.hpp b/src/blas/backends/netlib/netlib_common.hpp
index 18c08221d..b1848d277 100644
--- a/src/blas/backends/netlib/netlib_common.hpp
+++ b/src/blas/backends/netlib/netlib_common.hpp
@@ -29,20 +29,20 @@
 
 #include "cblas.h"
 
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
+#include "oneapi/math/types.hpp"
 
 #define GET_MULTI_PTR template get_multi_ptr<sycl::access::decorated::yes>().get_raw()
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace netlib {
 
 typedef enum { CblasFixOffset = 101, CblasColOffset = 102, CblasRowOffset = 103 } CBLAS_OFFSET;
 
 /**
- * Helper methods for converting between onemkl types and their BLAS
+ * Helper methods for converting between onemath types and their CBLAS
  * equivalents.
  */
 
@@ -97,7 +97,7 @@ static inline void host_task(H& cgh, F f) {
 
 } // namespace netlib
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif //_NETLIB_COMMON_HPP_
diff --git a/src/blas/backends/netlib/netlib_extensions.cpp b/src/blas/backends/netlib/netlib_extensions.cpp
index 4815ba598..c062ba56e 100644
--- a/src/blas/backends/netlib/netlib_extensions.cpp
+++ b/src/blas/backends/netlib/netlib_extensions.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "netlib_common.hpp"
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace netlib {
 namespace column_major {
@@ -47,5 +47,5 @@ namespace row_major {
 } // namespace row_major
 } // namespace netlib
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/netlib/netlib_level1.cpp b/src/blas/backends/netlib/netlib_level1.cpp
index 284adce75..aa55ec419 100644
--- a/src/blas/backends/netlib/netlib_level1.cpp
+++ b/src/blas/backends/netlib/netlib_level1.cpp
@@ -24,8 +24,8 @@
 #endif
 
 #include "netlib_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
 
 inline float abs_val(float val) {
     return std::abs(val);
@@ -222,7 +222,7 @@ void cblas_zrotg(std::complex<double>* ca, const std::complex<double>* cb, doubl
 }
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace netlib {
 namespace column_major {
@@ -241,5 +241,5 @@ namespace row_major {
 } // namespace row_major
 } // namespace netlib
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/netlib/netlib_level2.cpp b/src/blas/backends/netlib/netlib_level2.cpp
index fb63bf3a9..c33636564 100644
--- a/src/blas/backends/netlib/netlib_level2.cpp
+++ b/src/blas/backends/netlib/netlib_level2.cpp
@@ -24,10 +24,10 @@
 #endif
 
 #include "netlib_common.hpp"
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace netlib {
 namespace column_major {
@@ -46,5 +46,5 @@ namespace row_major {
 } // namespace row_major
 } // namespace netlib
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/netlib/netlib_level3.cpp b/src/blas/backends/netlib/netlib_level3.cpp
index c41f78205..1d1fcc027 100644
--- a/src/blas/backends/netlib/netlib_level3.cpp
+++ b/src/blas/backends/netlib/netlib_level3.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "netlib_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace netlib {
 namespace column_major {
@@ -51,5 +51,5 @@ namespace row_major {
 } // namespace row_major
 } // namespace netlib
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/netlib/netlib_wrappers.cpp b/src/blas/backends/netlib/netlib_wrappers.cpp
index 1a377f647..291b8e4ed 100644
--- a/src/blas/backends/netlib/netlib_wrappers.cpp
+++ b/src/blas/backends/netlib/netlib_wrappers.cpp
@@ -18,11 +18,11 @@
 *******************************************************************************/
 
 #include "blas/function_table.hpp"
-#include "oneapi/mkl/blas/detail/netlib/onemkl_blas_netlib.hpp"
+#include "oneapi/math/blas/detail/netlib/onemath_blas_netlib.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT blas_function_table_t mkl_blas_table = {
+extern "C" ONEMATH_EXPORT blas_function_table_t onemath_blas_table = {
     WRAPPER_VERSION,
 #define BACKEND netlib
 #define MAJOR   column_major
diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/portblas/CMakeLists.txt
index 3256554a3..f2c38f9b3 100644
--- a/src/blas/backends/portblas/CMakeLists.txt
+++ b/src/blas/backends/portblas/CMakeLists.txt
@@ -17,7 +17,7 @@
 #
 #=========================================================================
 
-set(LIB_NAME onemkl_blas_portblas)
+set(LIB_NAME onemath_blas_portblas)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 if(NOT DEFINED PORTBLAS_TUNING_TARGET)
@@ -36,10 +36,10 @@ endfunction(get_sycl_targets)
 # portBLAS supports tuning for some device types, but can only be compiled
 # for one at a time currently. Work out which device to tune for based on the
 # DPC++ target triple specified via -fsycl-targets
-if(TARGET ONEMKL::SYCL::SYCL)
-  get_target_property(ONEMKL_COMPILE_OPTIONS ONEMKL::SYCL::SYCL INTERFACE_COMPILE_OPTIONS)
+if(TARGET ONEMATH::SYCL::SYCL)
+  get_target_property(ONEMATH_COMPILE_OPTIONS ONEMATH::SYCL::SYCL INTERFACE_COMPILE_OPTIONS)
 endif()
-get_sycl_targets("${ONEMKL_COMPILE_OPTIONS}")
+get_sycl_targets("${ONEMATH_COMPILE_OPTIONS}")
 list(LENGTH SYCL_TARGETS NUM_TARGETS)
 if(NUM_TARGETS EQUAL 0)
   get_sycl_targets("${CMAKE_CXX_FLAGS}")
@@ -50,21 +50,21 @@ if(PORTBLAS_TUNING_TARGET)
   # Allow the user to manually enable a specific device type 
   # for tuned portBLAS configurations and sets sycl-target.
   if(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_CPU")
-    set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
+    set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
     set(PORTBLAS_TUNING_TARGET "")
-    target_compile_options(ONEMKL::SYCL::SYCL INTERFACE
+    target_compile_options(ONEMATH::SYCL::SYCL INTERFACE
       -fsycl-targets=spir64_x86_64 -fsycl-unnamed-lambda)
-    target_link_options(ONEMKL::SYCL::SYCL INTERFACE
+    target_link_options(ONEMATH::SYCL::SYCL INTERFACE
       -fsycl-targets=spir64_x86_64)
   elseif(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_GPU")
-    set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
+    set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
   elseif(PORTBLAS_TUNING_TARGET STREQUAL "AMD_GPU")
-    set(ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "")
+    set(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "")
     if (is_dpcpp)
-      target_compile_options(ONEMKL::SYCL::SYCL INTERFACE
+      target_compile_options(ONEMATH::SYCL::SYCL INTERFACE
         -fsycl-targets=amdgcn-amd-amdhsa -fsycl-unnamed-lambda
         -Xsycl-target-backend --offload-arch=${HIP_TARGETS})
-      target_link_options(ONEMKL::SYCL::SYCL INTERFACE
+      target_link_options(ONEMATH::SYCL::SYCL INTERFACE
         -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${HIP_TARGETS})
     else()
       message(WARNING "Compiler is not supported."
@@ -72,16 +72,16 @@ if(PORTBLAS_TUNING_TARGET)
       " Compilation may fail.")
     endif()
   elseif(PORTBLAS_TUNING_TARGET STREQUAL "NVIDIA_GPU")
-    set(ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "")
+    set(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "")
     if (is_dpcpp)
-      target_compile_options(ONEMKL::SYCL::SYCL INTERFACE
+      target_compile_options(ONEMATH::SYCL::SYCL INTERFACE
         -fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
-      target_link_options(ONEMKL::SYCL::SYCL INTERFACE
+      target_link_options(ONEMATH::SYCL::SYCL INTERFACE
         -fsycl-targets=nvptx64-nvidia-cuda)
       if(DEFINED CUDA_TARGET)
-        target_compile_options(ONEMKL::SYCL::SYCL INTERFACE
+        target_compile_options(ONEMATH::SYCL::SYCL INTERFACE
           -Xsycl-target-backend --cuda-gpu-arch=${CUDA_TARGET})
-        target_link_options(ONEMKL::SYCL::SYCL INTERFACE
+        target_link_options(ONEMATH::SYCL::SYCL INTERFACE
           -Xsycl-target-backend --cuda-gpu-arch=${CUDA_TARGET})
       endif()
     else()
@@ -94,27 +94,27 @@ if(PORTBLAS_TUNING_TARGET)
   endif()
 elseif(NUM_TARGETS EQUAL 0)
   # Enable portBLAS backend for all devices types
-  set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
-  set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
-  set(ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "")
-  set(ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "")
+  set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
+  set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
+  set(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "")
+  set(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "")
 else()
   # Try to automatically detect the PORTBLAS_TUNING_TARGET
   foreach(SYCL_TARGET IN LISTS SYCL_TARGETS)
     if(SYCL_TARGETS MATCHES "^intel_gpu" OR SYCL_TARGETS MATCHES "^spir64_gen")
-      set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
+      set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
       set(PORTBLAS_TUNING_TARGET "INTEL_GPU")
     elseif(SYCL_TARGETS MATCHES "^spir64_x86_64")
-      set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
+      set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
     elseif(SYCL_TARGETS MATCHES "^spir64")
-      set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
-      set(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
+      set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "")
+      set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "")
       set(PORTBLAS_TUNING_TARGET "INTEL_GPU")
     elseif(SYCL_TARGETS MATCHES "^amd_gpu" OR SYCL_TARGETS MATCHES "-amd-")
-      set(ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "")
+      set(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "")
       set(PORTBLAS_TUNING_TARGET "AMD_GPU")
     elseif(SYCL_TARGETS MATCHES "^nvidia_gpu" OR SYCL_TARGETS MATCHES "-nvidia-")
-      set(ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "")
+      set(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "")
       set(PORTBLAS_TUNING_TARGET "NVIDIA_GPU")
     endif()
   endforeach()
@@ -135,7 +135,7 @@ else()
 endif()
 
 # If find_package doesn't work, download portBLAS from Github. This is
-# intended to make OneMKL easier to use.
+# intended to make oneMath easier to use.
 message(STATUS "Looking for portBLAS")
 find_package(PORTBLAS QUIET)
 if (NOT PORTBLAS_FOUND)
@@ -176,8 +176,9 @@ set(SOURCES
   portblas_batch.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: portblas_wrappers.cpp>)
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_blas ${LIB_NAME})
 
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET ${LIB_OBJ} SOURCES ${SOURCES})
@@ -188,11 +189,11 @@ target_include_directories(${LIB_OBJ}
           ${PROJECT_SOURCE_DIR}/src/include
           ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL portblas)
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL portblas)
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON)
@@ -201,7 +202,7 @@ target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -214,8 +215,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/blas/backends/portblas/portblas_batch.cpp b/src/blas/backends/portblas/portblas_batch.cpp
index 65f0cd59e..d80207c8c 100644
--- a/src/blas/backends/portblas/portblas_batch.cpp
+++ b/src/blas/backends/portblas/portblas_batch.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "portblas_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 namespace column_major {
@@ -53,5 +53,5 @@ constexpr bool is_column_major() {
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_batch.cxx b/src/blas/backends/portblas/portblas_batch.cxx
index 2fe63a127..75b1a115b 100644
--- a/src/blas/backends/portblas/portblas_batch.cxx
+++ b/src/blas/backends/portblas/portblas_batch.cxx
@@ -19,21 +19,21 @@
 
 // Buffer APIs
 
-void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
     throw unimplemented("blas", "syrk_batch", "");
 }
 
-void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
     throw unimplemented("blas", "syrk_batch", "");
 }
 
-void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                 std::int64_t n, std::int64_t k, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
@@ -41,7 +41,7 @@ void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::
     throw unimplemented("blas", "syrk_batch", "");
 }
 
-void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                 std::int64_t n, std::int64_t k, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
@@ -49,7 +49,7 @@ void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::
     throw unimplemented("blas", "syrk_batch", "");
 }
 
-void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                 float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex, float beta,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
@@ -57,7 +57,7 @@ void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m
     throw unimplemented("blas", "gemv_batch", "");
 }
 
-void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                 double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex, double beta,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
@@ -65,7 +65,7 @@ void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m
     throw unimplemented("blas", "gemv_batch", "");
 }
 
-void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
@@ -74,7 +74,7 @@ void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m
     throw unimplemented("blas", "gemv_batch", "");
 }
 
-void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
@@ -83,7 +83,7 @@ void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m
     throw unimplemented("blas", "gemv_batch", "");
 }
 
-void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -91,7 +91,7 @@ void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m
     throw unimplemented("blas", "dgmm_batch", "");
 }
 
-void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -99,7 +99,7 @@ void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m
     throw unimplemented("blas", "dgmm_batch", "");
 }
 
-void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -107,7 +107,7 @@ void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m
     throw unimplemented("blas", "dgmm_batch", "");
 }
 
-void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -166,7 +166,7 @@ void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<do
     throw unimplemented("blas", "copy_batch", "");
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -176,7 +176,7 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
                      stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size);
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, double alpha,
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
@@ -186,7 +186,7 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
                      stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size);
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -195,7 +195,7 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
     throw unimplemented("blas", "gemm_batch", " for complex");
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -204,7 +204,7 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
     throw unimplemented("blas", "gemm_batch", " for complex");
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
                 sycl::buffer<sycl::half, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -213,7 +213,7 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
     throw unimplemented("blas", "gemm_batch", " for complex");
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<sycl::half, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -222,7 +222,7 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
     throw unimplemented("blas", "gemm_batch", " for unsupported dtype");
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -231,7 +231,7 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
     throw unimplemented("blas", "gemm_batch", " for unsupported dtype");
 }
 
-void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -240,32 +240,32 @@ void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::
     throw unimplemented("blas", "gemm_batch", " for unsupported dtype");
 }
 
-void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                 std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                 std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                 std::int64_t stride_b, std::int64_t batch_size) {
     throw unimplemented("blas", "trsm_batch", "");
 }
 
-void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                 std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                 std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                 std::int64_t stride_b, std::int64_t batch_size) {
     throw unimplemented("blas", "trsm_batch", "");
 }
 
-void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                 std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
     throw unimplemented("blas", "trsm_batch", "");
 }
 
-void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                 std::int64_t n, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -273,7 +273,7 @@ void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::u
     throw unimplemented("blas", "trsm_batch", "");
 }
 
-void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
@@ -281,7 +281,7 @@ void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64
                      stride_b, batch_size);
 }
 
-void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
                     std::int64_t stride_b, std::int64_t batch_size) {
@@ -289,7 +289,7 @@ void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64
                      stride_b, batch_size);
 }
 
-void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
@@ -297,7 +297,7 @@ void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64
     throw unimplemented("blas", "omatcopy_batch", "");
 }
 
-void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
@@ -305,63 +305,66 @@ void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64
     throw unimplemented("blas", "omatcopy_batch", "");
 }
 
-void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
     throw unimplemented("blas", "imatcopy_batch", "");
 }
 
-void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
                     std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
     throw unimplemented("blas", "imatcopy_batch", "");
 }
 
-void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
     throw unimplemented("blas", "imatcopy_batch", "");
 }
 
-void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                     std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
     throw unimplemented("blas", "imatcopy_batch", "");
 }
 
-void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                   std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
-                   std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b,
-                   std::int64_t ldb, std::int64_t stride_b, sycl::buffer<float, 1>& c,
-                   std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
+void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n, float alpha,
+                   sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a, float beta,
+                   sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
+                   sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+                   std::int64_t batch_size) {
     CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a,
                      beta, b, ldb, stride_b, c, ldc, stride_c, batch_size);
 }
 
-void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                   std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
-                   std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b,
-                   std::int64_t ldb, std::int64_t stride_b, sycl::buffer<double, 1>& c,
-                   std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
+void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n, double alpha,
+                   sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a, double beta,
+                   sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
+                   sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+                   std::int64_t batch_size) {
     CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a,
                      beta, b, ldb, stride_b, c, ldc, stride_c, batch_size);
 }
 
-void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                   std::int64_t m, std::int64_t n, std::complex<float> alpha,
-                   sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
-                   std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
-                   std::int64_t ldb, std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c,
-                   std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
+void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                   std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
+                   std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
+                   sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
+                   sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+                   std::int64_t batch_size) {
     throw unimplemented("blas", "omatadd_batch", "");
 }
 
-void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-                   std::int64_t m, std::int64_t n, std::complex<double> alpha,
-                   sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
-                   std::int64_t stride_a, std::complex<double> beta,
+void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                   oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
+                   std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
+                   std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
                    sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                    std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c,
                    std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
@@ -370,24 +373,24 @@ void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mk
 
 // USM APIs
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                       oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                       oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k,
                        float* alpha, const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                       oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                       oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k,
                        double* alpha, const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                       oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                       oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k,
                        std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -395,8 +398,8 @@ sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
-                       oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower,
+                       oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k,
                        std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -404,24 +407,24 @@ sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower,
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                       oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                       oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha,
                        const float* a, std::int64_t lda, std::int64_t stride_a, float beta,
                        float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                       oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                       oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha,
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                       oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                       oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                        std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                        std::int64_t stride_a, std::complex<float> beta, std::complex<float>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
@@ -429,8 +432,8 @@ sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                       oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                       oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                        std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                        std::int64_t stride_a, std::complex<double> beta, std::complex<double>* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
@@ -438,7 +441,7 @@ sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
     throw unimplemented("blas", "syrk_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stridea, const float* x, std::int64_t incx,
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
@@ -447,7 +450,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::in
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stridea, const double* x, std::int64_t incx,
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
@@ -456,7 +459,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::in
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                        std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                        std::int64_t lda, std::int64_t stridea, const std::complex<float>* x,
                        std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
@@ -465,7 +468,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::in
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                        std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                        std::int64_t lda, std::int64_t stridea, const std::complex<double>* x,
                        std::int64_t incx, std::int64_t stridex, std::complex<double> beta,
@@ -474,7 +477,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::in
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m,
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda,
                        const float** x, std::int64_t* incx, float* beta, float** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
@@ -482,7 +485,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::i
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m,
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        const double** x, std::int64_t* incx, double* beta, double** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
@@ -490,7 +493,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::i
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m,
                        std::int64_t* n, std::complex<float>* alpha, const std::complex<float>** a,
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>* beta, std::complex<float>** y, std::int64_t* incy,
@@ -499,7 +502,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::i
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m,
+sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m,
                        std::int64_t* n, std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>* beta, std::complex<double>** y, std::int64_t* incy,
@@ -508,7 +511,7 @@ sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::i
     throw unimplemented("blas", "gemv_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
                        std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea,
                        const float* x, std::int64_t incx, std::int64_t stridex, float* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
@@ -516,7 +519,7 @@ sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::in
     throw unimplemented("blas", "dgmm_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
                        std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea,
                        const double* x, std::int64_t incx, std::int64_t stridex, double* c,
                        std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
@@ -524,7 +527,7 @@ sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::in
     throw unimplemented("blas", "dgmm_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
                        std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                        std::int64_t stridea, const std::complex<float>* x, std::int64_t incx,
                        std::int64_t stridex, std::complex<float>* c, std::int64_t ldc,
@@ -533,7 +536,7 @@ sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::in
     throw unimplemented("blas", "dgmm_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m,
                        std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                        std::int64_t stridea, const std::complex<double>* x, std::int64_t incx,
                        std::int64_t stridex, std::complex<double>* c, std::int64_t ldc,
@@ -542,21 +545,21 @@ sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::in
     throw unimplemented("blas", "dgmm_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m,
                        std::int64_t* n, const float** a, std::int64_t* lda, const float** x,
                        std::int64_t* incx, float** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "dgmm_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m,
                        std::int64_t* n, const double** a, std::int64_t* lda, const double** x,
                        std::int64_t* incx, double** c, std::int64_t* ldc, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "dgmm_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m,
                        std::int64_t* n, const std::complex<float>** a, std::int64_t* lda,
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -564,7 +567,7 @@ sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::i
     throw unimplemented("blas", "dgmm_batch", " for USM");
 }
 
-sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m,
+sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m,
                        std::int64_t* n, const std::complex<double>** a, std::int64_t* lda,
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -680,8 +683,8 @@ sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex<do
     throw unimplemented("blas", "copy_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, float* alpha, const float** a, std::int64_t* lda,
                        const float** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -689,8 +692,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, double* alpha, const double** a, std::int64_t* lda,
                        const double** b, std::int64_t* ldb, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -698,8 +701,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, std::complex<float>* alpha, const std::complex<float>** a,
                        std::int64_t* lda, const std::complex<float>** b, std::int64_t* ldb,
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
@@ -708,8 +711,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, const std::complex<double>** b, std::int64_t* ldb,
                        std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
@@ -718,8 +721,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, sycl::half* alpha, const sycl::half** a, std::int64_t* lda,
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -727,8 +730,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, float* alpha, const sycl::half** a, std::int64_t* lda,
                        const sycl::half** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -736,8 +739,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda,
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -745,8 +748,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
-                       oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa,
+                       oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n,
                        std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda,
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -754,8 +757,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, const float* b, std::int64_t ldb,
                        std::int64_t stride_b, float beta, float* c, std::int64_t ldc,
@@ -766,8 +769,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
                          dependencies);
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, const double* b, std::int64_t ldb,
                        std::int64_t stride_b, double beta, double* c, std::int64_t ldc,
@@ -778,8 +781,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
                          dependencies);
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
                        std::int64_t lda, std::int64_t stride_a, const std::complex<float>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::complex<float> beta,
@@ -788,8 +791,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, std::complex<double> alpha, const std::complex<double>* a,
                        std::int64_t lda, std::int64_t stride_a, const std::complex<double>* b,
                        std::int64_t ldb, std::int64_t stride_b, std::complex<double> beta,
@@ -798,8 +801,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, sycl::half alpha, const sycl::half* a, std::int64_t lda,
                        std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
                        std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
@@ -808,8 +811,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda,
                        std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
                        std::int64_t stride_b, float beta, float* c, std::int64_t ldc,
@@ -818,8 +821,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda,
                        std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb,
                        std::int64_t stride_b, float beta, float* c, std::int64_t ldc,
@@ -828,8 +831,8 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                       oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                       oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                        std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda,
                        std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb,
                        std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc,
@@ -838,27 +841,27 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                       oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                       oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                       oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                       oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                        const float* a, std::int64_t lda, std::int64_t stride_a, float* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                       oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                       oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                       oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                       oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                        const double* a, std::int64_t lda, std::int64_t stride_a, double* b,
                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                       oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                       oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                       oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                       oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                        std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                        std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb,
                        std::int64_t stride_b, std::int64_t batch_size,
@@ -866,9 +869,9 @@ sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
-                       oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                       oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right,
+                       oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                       oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                        std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                        std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb,
                        std::int64_t stride_b, std::int64_t batch_size,
@@ -876,36 +879,36 @@ sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right,
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                       oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                       oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, float* alpha,
-                       const float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
-                       std::int64_t group_count, std::int64_t* group_size,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                       oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
+                       oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n,
+                       float* alpha, const float** a, std::int64_t* lda, float** b,
+                       std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                       oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                       oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                       oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
+                       oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n,
                        double* alpha, const double** a, std::int64_t* lda, double** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                       oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                       oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                       oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
+                       oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n,
                        std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
                        std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                        std::int64_t* group_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
-                       oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
-                       oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n,
+sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right,
+                       oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
+                       oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n,
                        std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -913,7 +916,7 @@ sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right,
     throw unimplemented("blas", "trsm_batch", " for USM");
 }
 
-sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, float alpha, const float* a, std::int64_t lda,
                            std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
@@ -921,7 +924,7 @@ sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std
                          ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, double alpha, const double* a, std::int64_t lda,
                            std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
@@ -930,7 +933,7 @@ sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std
                          ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                            std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
                            std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
@@ -938,7 +941,7 @@ sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std
     throw unimplemented("blas", "omatcopy_batch", " for USM");
 }
 
-sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, std::complex<double> alpha,
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -946,36 +949,36 @@ sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std
     throw unimplemented("blas", "omatcopy_batch", " for USM");
 }
 
-sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "imatcopy_batch", " for USM");
 }
 
-sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, double alpha, double* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "imatcopy_batch", " for USM");
 }
 
-sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, std::complex<float> alpha, std::complex<float>* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "imatcopy_batch", " for USM");
 }
 
-sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                            std::int64_t n, std::complex<double> alpha, std::complex<double>* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "imatcopy_batch", " for USM");
 }
 
-sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                          oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                          oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                           float alpha, const float* a, std::int64_t lda, std::int64_t stride_a,
                           float beta, const float* b, std::int64_t ldb, std::int64_t stride_b,
                           float* c, std::int64_t ldc, std::int64_t stride_c,
@@ -985,8 +988,8 @@ sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
                          dependencies);
 }
 
-sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                          oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                          oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                           double alpha, const double* a, std::int64_t lda, std::int64_t stride_a,
                           double beta, const double* b, std::int64_t ldb, std::int64_t stride_b,
                           double* c, std::int64_t ldc, std::int64_t stride_c,
@@ -996,8 +999,8 @@ sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
                          dependencies);
 }
 
-sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                          oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                          oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                           std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                           std::int64_t stride_a, std::complex<float> beta,
                           const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -1006,8 +1009,8 @@ sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "omatadd_batch", " for USM");
 }
 
-sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa,
-                          oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa,
+                          oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                           std::complex<double> alpha, const std::complex<double>* a,
                           std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
                           const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
diff --git a/src/blas/backends/portblas/portblas_common.hpp b/src/blas/backends/portblas/portblas_common.hpp
index 1624749e8..c2713c2f5 100644
--- a/src/blas/backends/portblas/portblas_common.hpp
+++ b/src/blas/backends/portblas/portblas_common.hpp
@@ -21,14 +21,14 @@
 #define _PORTBLAS_COMMON_HPP_
 
 #include "portblas.hpp"
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 #include <tuple>
 #include <utility>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 
@@ -44,19 +44,19 @@ using buffer_iterator_t = ::blas::BufferIterator<ElemT>;
 template <typename ElemT>
 using sycl_complex_t = sycl::ext::oneapi::experimental::complex<ElemT>;
 
-/** A trait for obtaining equivalent portBLAS API types from oneMKL API
+/** A trait for obtaining equivalent portBLAS API types from oneMath API
  *  types.
  * 
- *  @tparam InputT is the oneMKL type.
+ *  @tparam InputT is the oneMath type.
  *  portblas_type<InputT>::type should be the equivalent portBLAS type.
 **/
 template <typename InputT>
 struct portblas_type;
 
-#define DEF_PORTBLAS_TYPE(onemkl_t, portblas_t) \
-    template <>                                 \
-    struct portblas_type<onemkl_t> {            \
-        using type = portblas_t;                \
+#define DEF_PORTBLAS_TYPE(onemath_t, portblas_t) \
+    template <>                                  \
+    struct portblas_type<onemath_t> {            \
+        using type = portblas_t;                 \
     };
 
 DEF_PORTBLAS_TYPE(sycl::queue, handle_t)
@@ -64,10 +64,10 @@ DEF_PORTBLAS_TYPE(int64_t, int64_t)
 DEF_PORTBLAS_TYPE(sycl::half, sycl::half)
 DEF_PORTBLAS_TYPE(float, float)
 DEF_PORTBLAS_TYPE(double, double)
-DEF_PORTBLAS_TYPE(oneapi::mkl::transpose, char)
-DEF_PORTBLAS_TYPE(oneapi::mkl::uplo, char)
-DEF_PORTBLAS_TYPE(oneapi::mkl::side, char)
-DEF_PORTBLAS_TYPE(oneapi::mkl::diag, char)
+DEF_PORTBLAS_TYPE(oneapi::math::transpose, char)
+DEF_PORTBLAS_TYPE(oneapi::math::uplo, char)
+DEF_PORTBLAS_TYPE(oneapi::math::side, char)
+DEF_PORTBLAS_TYPE(oneapi::math::diag, char)
 DEF_PORTBLAS_TYPE(std::complex<float>, sycl_complex_t<float>)
 DEF_PORTBLAS_TYPE(std::complex<double>, sycl_complex_t<double>)
 // Passthrough of portBLAS arg types for more complex wrapping.
@@ -101,10 +101,10 @@ struct portblas_type<std::vector<sycl::event>> {
     using type = std::vector<sycl::event>;
 };
 
-/** Convert a OneMKL argument to the type required for portBLAS.
+/** Convert a oneMath argument to the type required for portBLAS.
  *  
- *  @tparam InputT The OneMKL type.
- *  @param input The value of the oneMKL type.
+ *  @tparam InputT The oneMath type.
+ *  @param input The value of the oneMath type.
  *  @return The portBLAS value with appropriate type.
 **/
 template <typename InputT>
@@ -113,21 +113,21 @@ inline typename portblas_type<InputT>::type convert_to_portblas_type(InputT& inp
 }
 
 template <>
-inline char convert_to_portblas_type<oneapi::mkl::transpose>(oneapi::mkl::transpose& trans) {
-    if (trans == oneapi::mkl::transpose::nontrans) {
+inline char convert_to_portblas_type<oneapi::math::transpose>(oneapi::math::transpose& trans) {
+    if (trans == oneapi::math::transpose::nontrans) {
         return 'n';
     }
-    else if (trans == oneapi::mkl::transpose::trans) {
+    else if (trans == oneapi::math::transpose::trans) {
         return 't';
     }
-    else { // trans == oneapi::mkl::transpose::conjtrans
+    else { // trans == oneapi::math::transpose::conjtrans
         return 'c';
     }
 }
 
 template <>
-inline char convert_to_portblas_type<oneapi::mkl::uplo>(oneapi::mkl::uplo& upper_lower) {
-    if (upper_lower == oneapi::mkl::uplo::upper) {
+inline char convert_to_portblas_type<oneapi::math::uplo>(oneapi::math::uplo& upper_lower) {
+    if (upper_lower == oneapi::math::uplo::upper) {
         return 'u';
     }
     else {
@@ -136,8 +136,8 @@ inline char convert_to_portblas_type<oneapi::mkl::uplo>(oneapi::mkl::uplo& upper
 }
 
 template <>
-inline char convert_to_portblas_type<oneapi::mkl::side>(oneapi::mkl::side& left_right) {
-    if (left_right == oneapi::mkl::side::left) {
+inline char convert_to_portblas_type<oneapi::math::side>(oneapi::math::side& left_right) {
+    if (left_right == oneapi::math::side::left) {
         return 'l';
     }
     else {
@@ -146,8 +146,8 @@ inline char convert_to_portblas_type<oneapi::mkl::side>(oneapi::mkl::side& left_
 }
 
 template <>
-inline char convert_to_portblas_type<oneapi::mkl::diag>(oneapi::mkl::diag& unit_diag) {
-    if (unit_diag == oneapi::mkl::diag::unit) {
+inline char convert_to_portblas_type<oneapi::math::diag>(oneapi::math::diag& unit_diag) {
+    if (unit_diag == oneapi::math::diag::unit) {
         return 'u';
     }
     else {
@@ -180,8 +180,8 @@ struct throw_if_unsupported_by_device {
         static constexpr bool checkTypeInPack = (std::is_same_v<CheckT, ArgTs> || ...);
         if (checkTypeInPack) {
             if (!q.get_info<sycl::info::queue::device>().has(AspectVal)) {
-                throw mkl::unsupported_device("blas", message,
-                                              q.get_info<sycl::info::queue::device>());
+                throw math::unsupported_device("blas", message,
+                                               q.get_info<sycl::info::queue::device>());
             }
         }
     }
@@ -233,7 +233,7 @@ struct throw_if_unsupported_by_device {
 
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // _PORTBLAS_COMMON_HPP_
diff --git a/src/blas/backends/portblas/portblas_gemm_bias.cxx b/src/blas/backends/portblas/portblas_gemm_bias.cxx
index 0b62ee674..83e39b2d9 100644
--- a/src/blas/backends/portblas/portblas_gemm_bias.cxx
+++ b/src/blas/backends/portblas/portblas_gemm_bias.cxx
@@ -19,32 +19,32 @@
 
 // Buffer APIs
 
-void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-               oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
+void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+               oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
                float alpha, sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
     throw unimplemented("blas", "gemm_bias", "");
 }
 
-void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-               oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
+void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+               oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
                float alpha, sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
     throw unimplemented("blas", "gemm_bias", "");
 }
 
-void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-               oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
+void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+               oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
                float alpha, sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
     throw unimplemented("blas", "gemm_bias", "");
 }
 
-void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-               oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
+void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+               oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k,
                float alpha, sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
                sycl::buffer<int32_t, 1>& c, std::int64_t ldc, sycl::buffer<int32_t, 1>& co) {
@@ -53,8 +53,8 @@ void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::t
 
 // USM APIs
 
-sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                      oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m,
+sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                      oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m,
                       std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a,
                       std::int64_t lda, std::int8_t ao, const std::uint8_t* b, std::int64_t ldb,
                       std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
@@ -62,8 +62,8 @@ sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_bias", " for USM");
 }
 
-sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                      oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m,
+sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                      oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m,
                       std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a,
                       std::int64_t lda, std::int8_t ao, const std::int8_t* b, std::int64_t ldb,
                       std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
@@ -71,8 +71,8 @@ sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_bias", " for USM");
 }
 
-sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                      oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m,
+sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                      oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m,
                       std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a,
                       std::int64_t lda, std::uint8_t ao, const std::int8_t* b, std::int64_t ldb,
                       std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
@@ -80,8 +80,8 @@ sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
     throw unimplemented("blas", "gemm_bias", " for USM");
 }
 
-sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa,
-                      oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m,
+sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa,
+                      oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m,
                       std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a,
                       std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb,
                       std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
diff --git a/src/blas/backends/portblas/portblas_level1_double.cpp b/src/blas/backends/portblas/portblas_level1_double.cpp
index 4c99f98c6..172fcfa8d 100644
--- a/src/blas/backends/portblas/portblas_level1_double.cpp
+++ b/src/blas/backends/portblas/portblas_level1_double.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "portblas_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 
@@ -58,5 +58,5 @@ constexpr bool is_column_major() {
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_level1_float.cpp b/src/blas/backends/portblas/portblas_level1_float.cpp
index 744729f1a..b4a54375d 100644
--- a/src/blas/backends/portblas/portblas_level1_float.cpp
+++ b/src/blas/backends/portblas/portblas_level1_float.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "portblas_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 
@@ -56,5 +56,5 @@ constexpr bool is_column_major() {
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_level2.cxx b/src/blas/backends/portblas/portblas_level2.cxx
index a99077a51..e1bc4766a 100644
--- a/src/blas/backends/portblas/portblas_level2.cxx
+++ b/src/blas/backends/portblas/portblas_level2.cxx
@@ -19,20 +19,20 @@
 
 // Buffer APIs
 
-void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
           real_t alpha, sycl::buffer<real_t, 1>& a, std::int64_t lda, sycl::buffer<real_t, 1>& x,
           std::int64_t incx, real_t beta, sycl::buffer<real_t, 1>& y, std::int64_t incy) {
     CALL_PORTBLAS_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);
 }
 
-void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
           std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx, std::complex<real_t> beta,
           sycl::buffer<std::complex<real_t>, 1>& y, std::int64_t incy) {
     throw unimplemented("blas", "gemv", " for complex");
 }
 
-void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
           std::int64_t kl, std::int64_t ku, real_t alpha, sycl::buffer<real_t, 1>& a,
           std::int64_t lda, sycl::buffer<real_t, 1>& x, std::int64_t incx, real_t beta,
           sycl::buffer<real_t, 1>& y, std::int64_t incy) {
@@ -40,7 +40,7 @@ void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std:
                      incy);
 }
 
-void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
           std::int64_t kl, std::int64_t ku, std::complex<real_t> alpha,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx, std::complex<real_t> beta,
@@ -68,172 +68,172 @@ void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<real_
     throw unimplemented("blas", "geru", "");
 }
 
-void hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k,
+void hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k,
           std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx, std::complex<real_t> beta,
           sycl::buffer<std::complex<real_t>, 1>& y, std::int64_t incy) {
     throw unimplemented("blas", "hbmv", "");
 }
 
-void hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+void hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
           std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx, std::complex<real_t> beta,
           sycl::buffer<std::complex<real_t>, 1>& y, std::int64_t incy) {
     throw unimplemented("blas", "hemv", "");
 }
 
-void her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
          sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda) {
     throw unimplemented("blas", "her", "");
 }
 
-void her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+void her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
           std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<real_t>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda) {
     throw unimplemented("blas", "her2", "");
 }
 
-void hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+void hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
           std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx, std::complex<real_t> beta,
           sycl::buffer<std::complex<real_t>, 1>& y, std::int64_t incy) {
     throw unimplemented("blas", "hpmv", "");
 }
 
-void hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
          sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<real_t>, 1>& a) {
     throw unimplemented("blas", "hpr", "");
 }
 
-void hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+void hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
           std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<real_t>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<real_t>, 1>& a) {
     throw unimplemented("blas", "hpr2", "");
 }
 
-void sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k,
+void sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k,
           real_t alpha, sycl::buffer<real_t, 1>& a, std::int64_t lda, sycl::buffer<real_t, 1>& x,
           std::int64_t incx, real_t beta, sycl::buffer<real_t, 1>& y, std::int64_t incy) {
     CALL_PORTBLAS_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y,
                      incy);
 }
 
-void symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
           sycl::buffer<real_t, 1>& a, std::int64_t lda, sycl::buffer<real_t, 1>& x,
           std::int64_t incx, real_t beta, sycl::buffer<real_t, 1>& y, std::int64_t incy) {
     CALL_PORTBLAS_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy);
 }
 
-void syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
          sycl::buffer<real_t, 1>& x, std::int64_t incx, sycl::buffer<real_t, 1>& a,
          std::int64_t lda) {
     CALL_PORTBLAS_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda);
 }
 
-void syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
           sycl::buffer<real_t, 1>& x, std::int64_t incx, sycl::buffer<real_t, 1>& y,
           std::int64_t incy, sycl::buffer<real_t, 1>& a, std::int64_t lda) {
     CALL_PORTBLAS_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda);
 }
 
-void spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
           sycl::buffer<real_t, 1>& a, sycl::buffer<real_t, 1>& x, std::int64_t incx, real_t beta,
           sycl::buffer<real_t, 1>& y, std::int64_t incy) {
     CALL_PORTBLAS_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy);
 }
 
-void spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
          sycl::buffer<real_t, 1>& x, std::int64_t incx, sycl::buffer<real_t, 1>& a) {
     CALL_PORTBLAS_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a);
 }
 
-void spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+void spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
           sycl::buffer<real_t, 1>& x, std::int64_t incx, sycl::buffer<real_t, 1>& y,
           std::int64_t incy, sycl::buffer<real_t, 1>& a) {
     CALL_PORTBLAS_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a);
 }
 
-void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<real_t, 1>& a,
+void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<real_t, 1>& a,
           std::int64_t lda, sycl::buffer<real_t, 1>& x, std::int64_t incx) {
     CALL_PORTBLAS_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx);
 }
 
-void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k,
+void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx) {
     throw unimplemented("blas", "tbmv", "");
 }
 
-void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<real_t, 1>& a,
+void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<real_t, 1>& a,
           std::int64_t lda, sycl::buffer<real_t, 1>& x, std::int64_t incx) {
     CALL_PORTBLAS_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx);
 }
 
-void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k,
+void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx) {
     throw unimplemented("blas", "tbsv", "");
 }
 
-void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a,
+void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a,
           sycl::buffer<real_t, 1>& x, std::int64_t incx) {
     CALL_PORTBLAS_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx);
 }
 
-void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
+void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx) {
     throw unimplemented("blas", "tpmv", "");
 }
 
-void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a,
+void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a,
           sycl::buffer<real_t, 1>& x, std::int64_t incx) {
     CALL_PORTBLAS_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx);
 }
 
-void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
+void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
           sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx) {
     throw unimplemented("blas", "tpsv", "");
 }
 
-void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a, std::int64_t lda,
-          sycl::buffer<real_t, 1>& x, std::int64_t incx) {
+void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a,
+          std::int64_t lda, sycl::buffer<real_t, 1>& x, std::int64_t incx) {
     CALL_PORTBLAS_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx);
 }
 
-void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
+void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx) {
     throw unimplemented("blas", "trmv", " for complex");
 }
 
-void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a, std::int64_t lda,
-          sycl::buffer<real_t, 1>& x, std::int64_t incx) {
+void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<real_t, 1>& a,
+          std::int64_t lda, sycl::buffer<real_t, 1>& x, std::int64_t incx) {
     CALL_PORTBLAS_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx);
 }
 
-void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-          oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
+void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+          oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer<std::complex<real_t>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<real_t>, 1>& x, std::int64_t incx) {
     throw unimplemented("blas", "trsv", "");
 }
 
 // USM APIs
 
-sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                  real_t alpha, const real_t* a, std::int64_t lda, const real_t* x,
                  std::int64_t incx, real_t beta, real_t* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -241,7 +241,7 @@ sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t
                          dependencies);
 }
 
-sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                  std::complex<real_t> alpha, const std::complex<real_t>* a, std::int64_t lda,
                  const std::complex<real_t>* x, std::int64_t incx, std::complex<real_t> beta,
                  std::complex<real_t>* y, std::int64_t incy,
@@ -249,7 +249,7 @@ sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t
     throw unimplemented("blas", "gemv", " for USM");
 }
 
-sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                  std::int64_t kl, std::int64_t ku, real_t alpha, const real_t* a, std::int64_t lda,
                  const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -257,7 +257,7 @@ sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t
                          incy, dependencies);
 }
 
-sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                  std::int64_t kl, std::int64_t ku, std::complex<real_t> alpha,
                  const std::complex<real_t>* a, std::int64_t lda, const std::complex<real_t>* x,
                  std::int64_t incx, std::complex<real_t> beta, std::complex<real_t>* y,
@@ -285,7 +285,7 @@ sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::comple
     throw unimplemented("blas", "geru", " for USM");
 }
 
-sycl::event hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k,
+sycl::event hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k,
                  std::complex<real_t> alpha, const std::complex<real_t>* a, std::int64_t lda,
                  const std::complex<real_t>* x, std::int64_t incx, std::complex<real_t> beta,
                  std::complex<real_t>* y, std::int64_t incy,
@@ -293,7 +293,7 @@ sycl::event hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t
     throw unimplemented("blas", "hbmv", " for USM");
 }
 
-sycl::event hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+sycl::event hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                  std::complex<real_t> alpha, const std::complex<real_t>* a, std::int64_t lda,
                  const std::complex<real_t>* x, std::int64_t incx, std::complex<real_t> beta,
                  std::complex<real_t>* y, std::int64_t incy,
@@ -301,20 +301,20 @@ sycl::event hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t
     throw unimplemented("blas", "hemv", " for USM");
 }
 
-sycl::event her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                 const std::complex<real_t>* x, std::int64_t incx, std::complex<real_t>* a,
                 std::int64_t lda, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "her", " for USM");
 }
 
-sycl::event her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+sycl::event her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                  std::complex<real_t> alpha, const std::complex<real_t>* x, std::int64_t incx,
                  const std::complex<real_t>* y, std::int64_t incy, std::complex<real_t>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "her2", " for USM");
 }
 
-sycl::event hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+sycl::event hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                  std::complex<real_t> alpha, const std::complex<real_t>* a,
                  const std::complex<real_t>* x, std::int64_t incx, std::complex<real_t> beta,
                  std::complex<real_t>* y, std::int64_t incy,
@@ -322,20 +322,20 @@ sycl::event hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t
     throw unimplemented("blas", "hpmv", " for USM");
 }
 
-sycl::event hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                 const std::complex<real_t>* x, std::int64_t incx, std::complex<real_t>* a,
                 const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "hpr", " for USM");
 }
 
-sycl::event hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+sycl::event hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                  std::complex<real_t> alpha, const std::complex<real_t>* x, std::int64_t incx,
                  const std::complex<real_t>* y, std::int64_t incy, std::complex<real_t>* a,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "hpr2", " for USM");
 }
 
-sycl::event sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k,
+sycl::event sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k,
                  real_t alpha, const real_t* a, std::int64_t lda, const real_t* x,
                  std::int64_t incx, real_t beta, real_t* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -343,127 +343,127 @@ sycl::event sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t
                          incy, dependencies);
 }
 
-sycl::event symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                  const real_t* a, std::int64_t lda, const real_t* x, std::int64_t incx, real_t beta,
                  real_t* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y,
                          incy, dependencies);
 }
 
-sycl::event syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                 const real_t* x, std::int64_t incx, real_t* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                  const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda,
                          dependencies);
 }
 
-sycl::event spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                  const real_t* a, const real_t* x, std::int64_t incx, real_t beta, real_t* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy,
                          dependencies);
 }
 
-sycl::event spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                 const real_t* x, std::int64_t incx, real_t* a,
                 const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a, dependencies);
 }
 
-sycl::event spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha,
+sycl::event spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha,
                  const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a,
                  const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a,
                          dependencies);
 }
 
-sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a,
+sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a,
                  std::int64_t lda, real_t* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx,
                          dependencies);
 }
 
-sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k,
+sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k,
                  const std::complex<real_t>* a, std::int64_t lda, std::complex<real_t>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "tbmv", " for USM");
 }
 
-sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a,
+sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a,
                  std::int64_t lda, real_t* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx,
                          dependencies);
 }
 
-sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k,
+sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k,
                  const std::complex<real_t>* a, std::int64_t lda, std::complex<real_t>* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "tbsv", " for USM");
 }
 
-sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, real_t* x,
+sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, real_t* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx,
                          dependencies);
 }
 
-sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
+sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
                  std::complex<real_t>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "tpmv", " for USM");
 }
 
-sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, real_t* x,
+sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, real_t* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx,
                          dependencies);
 }
 
-sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
+sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
                  std::complex<real_t>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "tpsv", " for USM");
 }
 
-sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda,
+sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda,
                  real_t* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx,
                          dependencies);
 }
 
-sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
+sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
                  std::int64_t lda, std::complex<real_t>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trmv", " for USM");
 }
 
-sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda,
+sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda,
                  real_t* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx,
                          dependencies);
 }
 
-sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                 oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
+sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                 oneapi::math::diag unit_diag, std::int64_t n, const std::complex<real_t>* a,
                  std::int64_t lda, std::complex<real_t>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trsv", " for USM");
diff --git a/src/blas/backends/portblas/portblas_level2_double.cpp b/src/blas/backends/portblas/portblas_level2_double.cpp
index 092aa0c59..5b26558f0 100644
--- a/src/blas/backends/portblas/portblas_level2_double.cpp
+++ b/src/blas/backends/portblas/portblas_level2_double.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "portblas_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 
@@ -56,5 +56,5 @@ constexpr bool is_column_major() {
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_level2_float.cpp b/src/blas/backends/portblas/portblas_level2_float.cpp
index 7308c05da..87c5b7842 100644
--- a/src/blas/backends/portblas/portblas_level2_float.cpp
+++ b/src/blas/backends/portblas/portblas_level2_float.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "portblas_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 
@@ -56,5 +56,5 @@ constexpr bool is_column_major() {
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_level3.cxx b/src/blas/backends/portblas/portblas_level3.cxx
index 57c6f25b1..d1aa32652 100644
--- a/src/blas/backends/portblas/portblas_level3.cxx
+++ b/src/blas/backends/portblas/portblas_level3.cxx
@@ -19,7 +19,7 @@
 
 // Buffer APIs
 
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer<real_t, 1>& a,
           std::int64_t lda, sycl::buffer<real_t, 1>& b, std::int64_t ldb, real_t beta,
           sycl::buffer<real_t, 1>& c, std::int64_t ldc) {
@@ -27,14 +27,14 @@ void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transp
                      ldc);
 }
 
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb, std::complex<real_t> beta,
           sycl::buffer<std::complex<real_t>, 1>& c, std::int64_t ldc) {
     using sycl_complex_real_t = sycl::ext::oneapi::experimental::complex<real_t>;
-    if (transa == oneapi::mkl::transpose::conjtrans ||
-        transb == oneapi::mkl::transpose::conjtrans) {
+    if (transa == oneapi::math::transpose::conjtrans ||
+        transb == oneapi::math::transpose::conjtrans) {
         throw unimplemented("blas", "gemm", "Conjugate Transpose unsupported yet on portBLAS");
     }
     // Intermediate buffers for conversion purposes as portBLAS expects sycl::complex instead of std::complex
@@ -63,7 +63,7 @@ void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transp
     queue.copy(out_pb_acc, out_acc);
 }
 
-void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+void symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
           std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer<real_t, 1>& a,
           std::int64_t lda, sycl::buffer<real_t, 1>& b, std::int64_t ldb, real_t beta,
           sycl::buffer<real_t, 1>& c, std::int64_t ldc) {
@@ -71,7 +71,7 @@ void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo up
                      beta, c, ldc);
 }
 
-void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+void symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<real_t> alpha,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb, std::complex<real_t> beta,
@@ -79,7 +79,7 @@ void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo up
     throw unimplemented("blas", "symm", "");
 }
 
-void hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+void hemm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<real_t> alpha,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb, std::complex<real_t> beta,
@@ -87,34 +87,34 @@ void hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo up
     throw unimplemented("blas", "hemm", "");
 }
 
-void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
           std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer<real_t, 1>& a,
           std::int64_t lda, real_t beta, sycl::buffer<real_t, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "syrk", "");
 }
 
-void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
           std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
           sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda, std::complex<real_t> beta,
           sycl::buffer<std::complex<real_t>, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "syrk", "");
 }
 
-void herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void herk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
           std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer<std::complex<real_t>, 1>& a,
           std::int64_t lda, real_t beta, sycl::buffer<std::complex<real_t>, 1>& c,
           std::int64_t ldc) {
     throw unimplemented("blas", "herk", "");
 }
 
-void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
            std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer<real_t, 1>& a,
            std::int64_t lda, sycl::buffer<real_t, 1>& b, std::int64_t ldb, real_t beta,
            sycl::buffer<real_t, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "syr2k", "");
 }
 
-void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
            std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
            sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb, std::complex<real_t> beta,
@@ -122,7 +122,7 @@ void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::trans
     throw unimplemented("blas", "syr2k", "");
 }
 
-void her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+void her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
            std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
            sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb, real_t beta,
@@ -130,44 +130,44 @@ void her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::trans
     throw unimplemented("blas", "her2k", "");
 }
 
-void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-          oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-          real_t alpha, sycl::buffer<real_t, 1>& a, std::int64_t lda, sycl::buffer<real_t, 1>& b,
-          std::int64_t ldb) {
+void trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+          oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
+          std::int64_t n, real_t alpha, sycl::buffer<real_t, 1>& a, std::int64_t lda,
+          sycl::buffer<real_t, 1>& b, std::int64_t ldb) {
     throw unimplemented("blas", "trmm", "");
 }
 
-void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-          oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-          std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb) {
+void trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+          oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
+          std::int64_t n, std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a,
+          std::int64_t lda, sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb) {
     throw unimplemented("blas", "trmm", "");
 }
 
-void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-          oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-          real_t alpha, sycl::buffer<real_t, 1>& a, std::int64_t lda, sycl::buffer<real_t, 1>& b,
-          std::int64_t ldb) {
+void trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+          oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
+          std::int64_t n, real_t alpha, sycl::buffer<real_t, 1>& a, std::int64_t lda,
+          sycl::buffer<real_t, 1>& b, std::int64_t ldb) {
     CALL_PORTBLAS_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha,
                      a, lda, b, ldb);
 }
 
-void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-          oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
-          std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
-          sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb) {
+void trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+          oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
+          std::int64_t n, std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a,
+          std::int64_t lda, sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb) {
     throw unimplemented("blas", "trsm", " for complex");
 }
 
-void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-           oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, real_t alpha,
+void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+           oneapi::math::transpose transb, std::int64_t n, std::int64_t k, real_t alpha,
            sycl::buffer<real_t, 1>& a, std::int64_t lda, sycl::buffer<real_t, 1>& b,
            std::int64_t ldb, real_t beta, sycl::buffer<real_t, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "gemmt", "");
 }
 
-void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-           oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k,
+void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+           oneapi::math::transpose transb, std::int64_t n, std::int64_t k,
            std::complex<real_t> alpha, sycl::buffer<std::complex<real_t>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<real_t>, 1>& b, std::int64_t ldb, std::complex<real_t> beta,
            sycl::buffer<std::complex<real_t>, 1>& c, std::int64_t ldc) {
@@ -228,7 +228,7 @@ void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_
 
 // USM APIs
 
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, const real_t* a,
                  std::int64_t lda, const real_t* b, std::int64_t ldb, real_t beta, real_t* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -236,20 +236,20 @@ sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl:
                          c, ldc, dependencies);
 }
 
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
                  const std::complex<real_t>* a, std::int64_t lda, const std::complex<real_t>* b,
                  std::int64_t ldb, std::complex<real_t> beta, std::complex<real_t>* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    if (transa == oneapi::mkl::transpose::conjtrans ||
-        transb == oneapi::mkl::transpose::conjtrans) {
+    if (transa == oneapi::math::transpose::conjtrans ||
+        transb == oneapi::math::transpose::conjtrans) {
         throw unimplemented("blas", "gemm", "Conjugate Transpose unsupported yet on portBLAS");
     }
     CALL_PORTBLAS_USM_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta,
                          c, ldc, dependencies);
 }
 
-sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
                  std::int64_t m, std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda,
                  const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -257,7 +257,7 @@ sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::
                          beta, c, ldc, dependencies);
 }
 
-sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<real_t> alpha,
                  const std::complex<real_t>* a, std::int64_t lda, const std::complex<real_t>* b,
                  std::int64_t ldb, std::complex<real_t> beta, std::complex<real_t>* c,
@@ -265,7 +265,7 @@ sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::
     throw unimplemented("blas", "symm", " for USM");
 }
 
-sycl::event hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+sycl::event hemm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<real_t> alpha,
                  const std::complex<real_t>* a, std::int64_t lda, const std::complex<real_t>* b,
                  std::int64_t ldb, std::complex<real_t> beta, std::complex<real_t>* c,
@@ -273,14 +273,14 @@ sycl::event hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::
     throw unimplemented("blas", "hemm", " for USM");
 }
 
-sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                  std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda,
                  real_t beta, real_t* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "syrk", " for USM");
 }
 
-sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                  std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
                  const std::complex<real_t>* a, std::int64_t lda, std::complex<real_t> beta,
                  std::complex<real_t>* c, std::int64_t ldc,
@@ -288,21 +288,21 @@ sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl:
     throw unimplemented("blas", "syrk", " for USM");
 }
 
-sycl::event herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+sycl::event herk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                  std::int64_t n, std::int64_t k, real_t alpha, const std::complex<real_t>* a,
                  std::int64_t lda, real_t beta, std::complex<real_t>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "herk", " for USM");
 }
 
-sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                   std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda,
                   const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "syr2k", " for USM");
 }
 
-sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                   std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
                   const std::complex<real_t>* a, std::int64_t lda, const std::complex<real_t>* b,
                   std::int64_t ldb, std::complex<real_t> beta, std::complex<real_t>* c,
@@ -310,7 +310,7 @@ sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl
     throw unimplemented("blas", "syr2k", " for USM");
 }
 
-sycl::event her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+sycl::event her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
                   std::int64_t n, std::int64_t k, std::complex<real_t> alpha,
                   const std::complex<real_t>* a, std::int64_t lda, const std::complex<real_t>* b,
                   std::int64_t ldb, real_t beta, std::complex<real_t>* c, std::int64_t ldc,
@@ -318,49 +318,50 @@ sycl::event her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl
     throw unimplemented("blas", "her2k", " for USM");
 }
 
-sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                 oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                  std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b,
                  std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trmm", " for USM");
 }
 
-sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                 oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                  std::int64_t n, std::complex<real_t> alpha, const std::complex<real_t>* a,
                  std::int64_t lda, std::complex<real_t>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trmm", " for USM");
 }
 
-sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                 oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                  std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b,
                  std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     CALL_PORTBLAS_USM_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n,
                          alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m,
+sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+                 oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m,
                  std::int64_t n, std::complex<real_t> alpha, const std::complex<real_t>* a,
                  std::int64_t lda, std::complex<real_t>* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "trsm", " for USM");
 }
 
-sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-                  oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, real_t alpha,
-                  const real_t* a, std::int64_t lda, const real_t* b, std::int64_t ldb, real_t beta,
-                  real_t* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                  oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n,
+                  std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda, const real_t* b,
+                  std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc,
+                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "gemmt", " for USM");
 }
 
-sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-                  oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k,
-                  std::complex<real_t> alpha, const std::complex<real_t>* a, std::int64_t lda,
-                  const std::complex<real_t>* b, std::int64_t ldb, std::complex<real_t> beta,
-                  std::complex<real_t>* c, std::int64_t ldc,
+sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                  oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n,
+                  std::int64_t k, std::complex<real_t> alpha, const std::complex<real_t>* a,
+                  std::int64_t lda, const std::complex<real_t>* b, std::int64_t ldb,
+                  std::complex<real_t> beta, std::complex<real_t>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "gemmt", " for USM");
 }
diff --git a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp b/src/blas/backends/portblas/portblas_level3_bfloat16.cpp
index cb5bac88f..7721f58c9 100644
--- a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp
+++ b/src/blas/backends/portblas/portblas_level3_bfloat16.cpp
@@ -23,28 +23,28 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 namespace column_major {
 
 // BUFFER
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-          sycl::buffer<oneapi::mkl::bfloat16, 1>& a, std::int64_t lda,
-          sycl::buffer<oneapi::mkl::bfloat16, 1>& b, std::int64_t ldb, float beta,
+          sycl::buffer<oneapi::math::bfloat16, 1>& a, std::int64_t lda,
+          sycl::buffer<oneapi::math::bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "gemm", " for bfloat16");
 }
 
 // USM
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                 const oneapi::mkl::bfloat16* a, std::int64_t lda, const oneapi::mkl::bfloat16* b,
+                 const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b,
                  std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "gemm", " for USM");
@@ -54,18 +54,18 @@ sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl:
 namespace row_major {
 
 // BUFFER
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-          sycl::buffer<oneapi::mkl::bfloat16, 1>& a, std::int64_t lda,
-          sycl::buffer<oneapi::mkl::bfloat16, 1>& b, std::int64_t ldb, float beta,
+          sycl::buffer<oneapi::math::bfloat16, 1>& a, std::int64_t lda,
+          sycl::buffer<oneapi::math::bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "gemm", " for bfloat16");
 }
 
 // USM
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
-                 const oneapi::mkl::bfloat16* a, std::int64_t lda, const oneapi::mkl::bfloat16* b,
+                 const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b,
                  std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
     throw unimplemented("blas", "gemm", " for USM");
@@ -74,5 +74,5 @@ sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl:
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_level3_double.cpp b/src/blas/backends/portblas/portblas_level3_double.cpp
index 9f9d82d37..80f2d0bce 100644
--- a/src/blas/backends/portblas/portblas_level3_double.cpp
+++ b/src/blas/backends/portblas/portblas_level3_double.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "portblas_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 
@@ -56,5 +56,5 @@ constexpr bool is_column_major() {
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_level3_float.cpp b/src/blas/backends/portblas/portblas_level3_float.cpp
index 53a5a1697..dea42ed9d 100644
--- a/src/blas/backends/portblas/portblas_level3_float.cpp
+++ b/src/blas/backends/portblas/portblas_level3_float.cpp
@@ -24,11 +24,11 @@
 #endif
 
 #include "portblas_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 
@@ -58,5 +58,5 @@ constexpr bool is_column_major() {
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_level3_half.cpp b/src/blas/backends/portblas/portblas_level3_half.cpp
index 136178998..dbd71ab4b 100644
--- a/src/blas/backends/portblas/portblas_level3_half.cpp
+++ b/src/blas/backends/portblas/portblas_level3_half.cpp
@@ -23,24 +23,24 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace portblas {
 namespace column_major {
 
 // BUFFER
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "gemm", " half");
 }
 
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -48,7 +48,7 @@ void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transp
 }
 
 // USM
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
                  const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
@@ -56,7 +56,7 @@ sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl:
     throw unimplemented("blas", "gemm", " for USM");
 }
 
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -66,14 +66,14 @@ sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl:
 namespace row_major {
 
 // BUFFER
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
     throw unimplemented("blas", "gemm", " half");
 }
 
-void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -81,7 +81,7 @@ void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transp
 }
 
 // USM
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
                  const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
@@ -89,7 +89,7 @@ sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl:
     throw unimplemented("blas", "gemm", " for USM");
 }
 
-sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
                  std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
                  std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -99,5 +99,5 @@ sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl:
 } // namespace row_major
 } // namespace portblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/portblas/portblas_wrappers.cpp b/src/blas/backends/portblas/portblas_wrappers.cpp
index 3f6170bb7..6777f358a 100644
--- a/src/blas/backends/portblas/portblas_wrappers.cpp
+++ b/src/blas/backends/portblas/portblas_wrappers.cpp
@@ -4,11 +4,11 @@
 
 #include "blas/function_table.hpp"
 
-#include "oneapi/mkl/blas/detail/portblas/onemkl_blas_portblas.hpp"
+#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT blas_function_table_t mkl_blas_table = {
+extern "C" ONEMATH_EXPORT blas_function_table_t onemath_blas_table = {
     WRAPPER_VERSION,
 #define BACKEND portblas
 #define MAJOR   column_major
diff --git a/src/blas/backends/rocblas/CMakeLists.txt b/src/blas/backends/rocblas/CMakeLists.txt
index 76dc126ad..c50e5349c 100644
--- a/src/blas/backends/rocblas/CMakeLists.txt
+++ b/src/blas/backends/rocblas/CMakeLists.txt
@@ -19,7 +19,7 @@
 #
 #=========================================================================
 
-set(LIB_NAME onemkl_blas_rocblas)
+set(LIB_NAME onemath_blas_rocblas)
 set(LIB_OBJ ${LIB_NAME}_obj)
 find_package(hip REQUIRED)
 find_package(rocblas REQUIRED)
@@ -30,37 +30,38 @@ set(SOURCES rocblas_level1.cpp
                 rocblas_level3.cpp 
                 rocblas_batch.cpp 
                 rocblas_extensions.cpp
-                $<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},dpc++>:rocblas_scope_handle.cpp >
-                $<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},hipsycl>:rocblas_scope_handle_hipsycl.cpp >
+                $<$<STREQUAL:${ONEMATH_SYCL_IMPLEMENTATION},dpc++>:rocblas_scope_handle.cpp >
+                $<$<STREQUAL:${ONEMATH_SYCL_IMPLEMENTATION},hipsycl>:rocblas_scope_handle_hipsycl.cpp >
                 $<$<BOOL:${BUILD_SHARED_LIBS}>: rocblas_wrappers.cpp>)
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_blas ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src/include
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-if(NOT ${ONEMKL_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
-    target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
-    target_compile_options(ONEMKL::SYCL::SYCL INTERFACE
+if(NOT ${ONEMATH_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
+    target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
+    target_compile_options(ONEMATH::SYCL::SYCL INTERFACE
         -fsycl-targets=amdgcn-amd-amdhsa -fsycl-unnamed-lambda
         -Xsycl-target-backend --offload-arch=${HIP_TARGETS})
-    target_link_options(ONEMKL::SYCL::SYCL INTERFACE
+    target_link_options(ONEMATH::SYCL::SYCL INTERFACE
         -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend 
         --offload-arch=${HIP_TARGETS})
 else()
-    target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
-    target_compile_options(ONEMKL::SYCL::SYCL INTERFACE)
-    target_link_options(ONEMKL::SYCL::SYCL INTERFACE)
+    target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
+    target_compile_options(ONEMATH::SYCL::SYCL INTERFACE)
+    target_link_options(ONEMATH::SYCL::SYCL INTERFACE)
 endif()
 
 target_link_libraries(${LIB_OBJ} PRIVATE roc::rocblas hip::host Threads::Threads)
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL)
 target_compile_features(${LIB_OBJ} PUBLIC cxx_std_17)
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON)
@@ -78,8 +79,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/blas/backends/rocblas/rocblas_batch.cpp b/src/blas/backends/rocblas/rocblas_batch.cpp
index ef614c02b..b6e550724 100644
--- a/src/blas/backends/rocblas/rocblas_batch.cpp
+++ b/src/blas/backends/rocblas/rocblas_batch.cpp
@@ -22,8 +22,8 @@
 #include "rocblas_helper.hpp"
 #include "rocblas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp"
 
 // Helper Functions
 
@@ -64,7 +64,7 @@ static inline void conj_vector(sycl::handler& cgh, T** ptr, const int64_t len, c
 }
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 namespace column_major {
@@ -81,7 +81,7 @@ inline void copy_batch(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T,
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = sc.get_mem<rocDataType*>(x_acc);
@@ -117,7 +117,7 @@ inline void axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = sc.get_mem<rocDataType*>(x_acc);
@@ -156,7 +156,7 @@ inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<const rocDataType*>(a_acc);
@@ -198,7 +198,7 @@ inline void dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<const rocDataType*>(a_acc);
@@ -245,7 +245,7 @@ inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose tran
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<const rocTypeA*>(a_acc);
@@ -314,7 +314,7 @@ inline void trsm_batch(Func func, sycl::queue& queue, side left_right, uplo uppe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<const rocDataType*>(a_acc);
@@ -355,7 +355,7 @@ inline void syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpos
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<const rocDataType*>(a_acc);
@@ -392,13 +392,13 @@ inline void omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64
     overflow_check(m, n, lda, ldb, stridea, strideb, batch_size);
 
     const T beta = 0;
-    const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n;
-    const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m;
+    const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n;
+    const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m;
 
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<const rocDataType*>(a_acc);
@@ -465,7 +465,7 @@ inline void omatadd_batch(Func func, sycl::queue& queue, transpose transa, trans
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<const rocDataType*>(a_acc);
@@ -510,7 +510,7 @@ inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t* n, const T
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             int64_t offset = 0;
@@ -552,7 +552,7 @@ inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t n, const T*
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<const rocDataType*>(x);
@@ -592,7 +592,7 @@ inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t* n, T* alph
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             int64_t offset = 0;
@@ -635,7 +635,7 @@ inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha,
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<const rocDataType*>(x);
@@ -675,7 +675,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, in
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -720,7 +720,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, i
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             int64_t offset = 0;
@@ -767,7 +767,7 @@ inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side left_right, in
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -810,7 +810,7 @@ inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side* left_right, i
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             rocblas_status err;
@@ -864,7 +864,7 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose tra
     rocblas_gemm_flags flags = rocblas_gemm_flags_none;
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocTypeA*>(a);
@@ -942,7 +942,7 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, tr
     rocblas_gemm_flags flags = rocblas_gemm_flags_none;
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             int64_t offset = 0;
@@ -1017,7 +1017,7 @@ inline sycl::event trsm_batch(Func func, sycl::queue& queue, side left_right, up
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1061,7 +1061,7 @@ inline sycl::event trsm_batch(Func func, sycl::queue& queue, side* left_right, u
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             rocblas_status err;
@@ -1112,7 +1112,7 @@ inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo* upper_lower,
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             rocblas_status err;
@@ -1159,7 +1159,7 @@ inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1201,12 +1201,12 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose trans
     overflow_check(m, n, lda, ldb, stridea, strideb, batch_size);
 
     const T beta = 0;
-    const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n;
-    const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m;
+    const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n;
+    const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m;
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1275,7 +1275,7 @@ inline sycl::event omatadd_batch(Func func, sycl::queue& queue, transpose transa
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1321,7 +1321,7 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose* tran
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             rocblas_status err;
@@ -1331,8 +1331,8 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose* tran
                 auto** b_ = reinterpret_cast<rocDataType**>(b);
 
                 const T beta = 0;
-                const auto new_m = trans[i] == oneapi::mkl::transpose::nontrans ? m[i] : n[i];
-                const auto new_n = trans[i] == oneapi::mkl::transpose::nontrans ? n[i] : m[i];
+                const auto new_m = trans[i] == oneapi::math::transpose::nontrans ? m[i] : n[i];
+                const auto new_n = trans[i] == oneapi::math::transpose::nontrans ? n[i] : m[i];
 
                 rocblas_native_func(func, err, handle, get_rocblas_operation(trans[i]),
                                     get_rocblas_operation(trans[i]), (int)new_m, (int)new_n,
@@ -1446,10 +1446,10 @@ inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m
                        int64_t stridea, sycl::buffer<std::complex<T>, 1>& x, int64_t incx,
                        int64_t stridex, std::complex<T> beta, sycl::buffer<std::complex<T>, 1>& y,
                        int64_t incy, int64_t stridey, int64_t batch_size) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         alpha = std::conj(alpha);
         beta = std::conj(beta);
 
@@ -1467,7 +1467,7 @@ inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m
     column_major::gemv_batch(func, queue, new_trans, n, m, alpha, a, lda, stridea, x, incx, stridex,
                              beta, y, incy, stridey, batch_size);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit(
                 [&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); });
@@ -1480,8 +1480,8 @@ inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m
                        T alpha, sycl::buffer<T, 1>& a, int64_t lda, int64_t stridea,
                        sycl::buffer<T, 1>& x, int64_t incx, int64_t stridex, T beta,
                        sycl::buffer<T, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::gemv_batch(func, queue, new_trans, n, m, alpha, a, lda, stridea, x, incx, stridex,
                              beta, y, incy, stridey, batch_size);
@@ -1508,8 +1508,8 @@ inline void dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m
                        sycl::buffer<T, 1>& a, int64_t lda, int64_t stridea, sycl::buffer<T, 1>& x,
                        int64_t incx, int64_t stridex, sycl::buffer<T, 1>& c, int64_t ldc,
                        int64_t stridec, int64_t batch_size) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
 
     column_major::dgmm_batch(func, queue, new_side, n, m, a, lda, stridea, x, incx, stridex, c, ldc,
                              stridec, batch_size);
@@ -1588,10 +1588,10 @@ inline void trsm_batch(Func func, sycl::queue& queue, side left_right, uplo uppe
                        transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha,
                        sycl::buffer<T, 1>& a, int64_t lda, int64_t stridea, sycl::buffer<T, 1>& b,
                        int64_t ldb, int64_t strideb, int64_t batch_size) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::trsm_batch(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda,
                              stridea, b, ldb, strideb, batch_size);
@@ -1618,10 +1618,10 @@ inline void syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpos
                        int64_t k, T alpha, sycl::buffer<T, 1>& a, int64_t lda, int64_t stridea,
                        T beta, sycl::buffer<T, 1>& c, int64_t ldc, int64_t stridec,
                        int64_t batch_size) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::syrk_batch(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, stridea, beta,
                              c, ldc, stridec, batch_size);
@@ -1821,10 +1821,10 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, in
                               const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         alpha = std::conj(alpha);
         beta = std::conj(beta);
 
@@ -1845,7 +1845,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, in
     done = column_major::gemv_batch(func, queue, new_trans, n, m, alpha, a, lda, stridea, x, incx,
                                     stridex, beta, y, incy, stridey, batch_size, dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             done = queue.submit([&](sycl::handler& cgh) {
                 cgh.depends_on(done);
@@ -1863,8 +1863,8 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, in
                               int64_t incx, int64_t stridex, T beta, T* y, int64_t incy,
                               int64_t stridey, int64_t batch_size,
                               const std::vector<sycl::event>& dependencies) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::gemv_batch(func, queue, new_trans, n, m, alpha, a, lda, stridea, x, incx,
                                     stridex, beta, y, incy, stridey, batch_size, dependencies);
@@ -1898,7 +1898,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, i
 
     int64_t stride = 0;
     for (int64_t i = 0; i < group_count; i++) {
-        if (trans[i] == oneapi::mkl::transpose::conjtrans) {
+        if (trans[i] == oneapi::math::transpose::conjtrans) {
             alpha[i] = std::conj(alpha[i]);
             beta[i] = std::conj(beta[i]);
 
@@ -1921,9 +1921,9 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, i
 
     auto tmp_trans = std::vector<transpose>{ (std::size_t)group_count };
     for (int64_t i = 0; i < group_count; i++) {
-        const auto new_trans = trans[i] == oneapi::mkl::transpose::nontrans
-                                   ? oneapi::mkl::transpose::trans
-                                   : oneapi::mkl::transpose::nontrans;
+        const auto new_trans = trans[i] == oneapi::math::transpose::nontrans
+                                   ? oneapi::math::transpose::trans
+                                   : oneapi::math::transpose::nontrans;
         tmp_trans[i] = trans[i];
         trans[i] = new_trans;
     }
@@ -1936,7 +1936,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, i
 
     stride = 0;
     for (int64_t i = 0; i < group_count; i++) {
-        if (trans[i] == oneapi::mkl::transpose::conjtrans) {
+        if (trans[i] == oneapi::math::transpose::conjtrans) {
             if (n[i] > 0) {
                 done = queue.submit([&](sycl::handler& cgh) {
                     conj_vector(cgh, y, n[i], incy[i], stride, group_size[i]);
@@ -1957,9 +1957,9 @@ inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, i
     auto tmp_trans = std::vector<transpose>{ static_cast<std::size_t>(group_count) };
 
     for (int64_t i = 0; i < group_count; i++) {
-        const auto new_trans = trans[i] == oneapi::mkl::transpose::nontrans
-                                   ? oneapi::mkl::transpose::trans
-                                   : oneapi::mkl::transpose::nontrans;
+        const auto new_trans = trans[i] == oneapi::math::transpose::nontrans
+                                   ? oneapi::math::transpose::trans
+                                   : oneapi::math::transpose::nontrans;
         tmp_trans[i] = trans[i];
         trans[i] = new_trans;
     }
@@ -1994,8 +1994,8 @@ inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side left_right, in
                               const T* a, int64_t lda, int64_t stridea, const T* x, int64_t incx,
                               int64_t stridex, T* c, int64_t ldc, int64_t stridec,
                               int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
 
     return column_major::dgmm_batch(func, queue, new_side, n, m, a, lda, stridea, x, incx, stridex,
                                     c, ldc, stridec, batch_size, dependencies);
@@ -2023,8 +2023,8 @@ inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side* left_right, i
                               T** c, int64_t* ldc, int64_t group_count, int64_t* group_size,
                               const std::vector<sycl::event>& dependencies) {
     for (int64_t i = 0; i < group_count; i++) {
-        const auto new_side = left_right[i] == oneapi::mkl::side::left ? oneapi::mkl::side::right
-                                                                       : oneapi::mkl::side::left;
+        const auto new_side = left_right[i] == oneapi::math::side::left ? oneapi::math::side::right
+                                                                        : oneapi::math::side::left;
         left_right[i] = new_side;
     }
 
@@ -2161,10 +2161,10 @@ inline sycl::event trsm_batch(Func func, sycl::queue& queue, side left_right, up
                               const T* a, int64_t lda, int64_t stridea, T* b, int64_t ldb,
                               int64_t strideb, int64_t batch_size,
                               const std::vector<sycl::event>& dependencies) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::trsm_batch(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha,
                                     a, lda, stridea, b, ldb, strideb, batch_size, dependencies);
@@ -2192,12 +2192,13 @@ inline sycl::event trsm_batch(Func func, sycl::queue& queue, side* left_right, u
                               const T** a, int64_t* lda, T** b, int64_t* ldb, int64_t group_count,
                               int64_t* group_size, const std::vector<sycl::event>& dependencies) {
     for (int64_t i = 0; i < group_count; i++) {
-        const auto new_side = left_right[i] == oneapi::mkl::side::left ? oneapi::mkl::side::right
-                                                                       : oneapi::mkl::side::left;
+        const auto new_side = left_right[i] == oneapi::math::side::left ? oneapi::math::side::right
+                                                                        : oneapi::math::side::left;
         left_right[i] = new_side;
 
-        const auto new_uplo = upper_lower[i] == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                                         : oneapi::mkl::uplo::lower;
+        const auto new_uplo = upper_lower[i] == oneapi::math::uplo::lower
+                                  ? oneapi::math::uplo::upper
+                                  : oneapi::math::uplo::lower;
         upper_lower[i] = new_uplo;
     }
 
@@ -2228,13 +2229,14 @@ inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo* upper_lower,
                               T** c, int64_t* ldc, int64_t group_count, int64_t* group_size,
                               const std::vector<sycl::event>& dependencies) {
     for (int64_t i = 0; i < group_count; i++) {
-        const auto new_uplo = upper_lower[i] == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                                         : oneapi::mkl::uplo::lower;
+        const auto new_uplo = upper_lower[i] == oneapi::math::uplo::lower
+                                  ? oneapi::math::uplo::upper
+                                  : oneapi::math::uplo::lower;
         upper_lower[i] = new_uplo;
 
-        const auto new_trans = trans[i] == oneapi::mkl::transpose::nontrans
-                                   ? oneapi::mkl::transpose::trans
-                                   : oneapi::mkl::transpose::nontrans;
+        const auto new_trans = trans[i] == oneapi::math::transpose::nontrans
+                                   ? oneapi::math::transpose::trans
+                                   : oneapi::math::transpose::nontrans;
         trans[i] = new_trans;
     }
 
@@ -2263,10 +2265,10 @@ inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, t
                               int64_t n, int64_t k, const T alpha, const T* a, int64_t lda,
                               int64_t stridea, const T beta, T* c, int64_t ldc, int64_t stridec,
                               int64_t batch_size, const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::syrk_batch(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, stridea,
                                     beta, c, ldc, stridec, batch_size, dependencies);
@@ -2425,5 +2427,5 @@ sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int
 } // namespace row_major
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/rocblas/rocblas_extensions.cpp b/src/blas/backends/rocblas/rocblas_extensions.cpp
index 5fa5b61aa..b649ed1ef 100644
--- a/src/blas/backends/rocblas/rocblas_extensions.cpp
+++ b/src/blas/backends/rocblas/rocblas_extensions.cpp
@@ -22,11 +22,11 @@
 #include "rocblas_helper.hpp"
 #include "rocblas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 namespace column_major {
@@ -97,13 +97,13 @@ inline void omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m,
     overflow_check(m, n, lda, ldb);
 
     const T beta = 0;
-    const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n;
-    const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m;
+    const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n;
+    const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m;
 
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -183,7 +183,7 @@ inline void omatadd(Func func, sycl::queue& queue, transpose transa, transpose t
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -284,12 +284,12 @@ inline sycl::event omatcopy(Func func, sycl::queue& queue, transpose trans, int6
     overflow_check(m, n, lda, ldb);
 
     const T beta = 0;
-    const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n;
-    const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m;
+    const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n;
+    const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m;
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -375,7 +375,7 @@ inline sycl::event omatadd(Func func, sycl::queue& queue, transpose transa, tran
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -712,5 +712,5 @@ OMATADD_LAUNCHER_USM(std::complex<double>, rocblas_zgeam)
 } // namespace row_major
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/rocblas/rocblas_handle.hpp b/src/blas/backends/rocblas/rocblas_handle.hpp
index cde400bfb..660be6371 100644
--- a/src/blas/backends/rocblas/rocblas_handle.hpp
+++ b/src/blas/backends/rocblas/rocblas_handle.hpp
@@ -24,7 +24,7 @@
 #include "rocblas_helper.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 
@@ -57,7 +57,7 @@ struct rocblas_handle_ {
 
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // _ROCBLAS_HANDLE_HPP_
diff --git a/src/blas/backends/rocblas/rocblas_helper.hpp b/src/blas/backends/rocblas/rocblas_helper.hpp
index e28139ef3..5f9b03276 100644
--- a/src/blas/backends/rocblas/rocblas_helper.hpp
+++ b/src/blas/backends/rocblas/rocblas_helper.hpp
@@ -29,17 +29,17 @@
 
 #include <rocblas/rocblas.h>
 #include <complex>
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include <hip/hip_runtime.h>
 #include "dtype_string.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 
 // The static assert to make sure that all index types used in
-// src/oneMKL/backend/rocblas/blas.hpp interface are int64_t
+// src/oneMath/backend/rocblas/blas.hpp interface are int64_t
 template <typename... Next>
 struct is_int64 : std::false_type {};
 
@@ -71,7 +71,7 @@ struct Overflow<Index, T...> {
 
 template <typename Index, typename... Next>
 void overflow_check(Index index, Next... indices) {
-    static_assert(is_int64<Index, Next...>::value, "oneMKL index type must be 64 bit integer.");
+    static_assert(is_int64<Index, Next...>::value, "oneMath index type must be 64 bit integer.");
     Overflow<Index, Next...>::check(index, indices...);
 }
 
@@ -183,35 +183,35 @@ inline void rocblas_native_func(Func func, rocblas_status err, rocblas_handle ha
 #endif
 };
 
-inline rocblas_operation get_rocblas_operation(oneapi::mkl::transpose trn) {
+inline rocblas_operation get_rocblas_operation(oneapi::math::transpose trn) {
     switch (trn) {
-        case oneapi::mkl::transpose::nontrans: return rocblas_operation_none;
-        case oneapi::mkl::transpose::trans: return rocblas_operation_transpose;
-        case oneapi::mkl::transpose::conjtrans: return rocblas_operation_conjugate_transpose;
+        case oneapi::math::transpose::nontrans: return rocblas_operation_none;
+        case oneapi::math::transpose::trans: return rocblas_operation_transpose;
+        case oneapi::math::transpose::conjtrans: return rocblas_operation_conjugate_transpose;
         default: throw "Wrong transpose Operation.";
     }
 }
 
-inline rocblas_fill get_rocblas_fill_mode(oneapi::mkl::uplo ul) {
+inline rocblas_fill get_rocblas_fill_mode(oneapi::math::uplo ul) {
     switch (ul) {
-        case oneapi::mkl::uplo::upper: return rocblas_fill_upper;
-        case oneapi::mkl::uplo::lower: return rocblas_fill_lower;
+        case oneapi::math::uplo::upper: return rocblas_fill_upper;
+        case oneapi::math::uplo::lower: return rocblas_fill_lower;
         default: throw "Wrong fill mode.";
     }
 }
 
-inline rocblas_diagonal get_rocblas_diag_type(oneapi::mkl::diag un) {
+inline rocblas_diagonal get_rocblas_diag_type(oneapi::math::diag un) {
     switch (un) {
-        case oneapi::mkl::diag::unit: return rocblas_diagonal_unit;
-        case oneapi::mkl::diag::nonunit: return rocblas_diagonal_non_unit;
+        case oneapi::math::diag::unit: return rocblas_diagonal_unit;
+        case oneapi::math::diag::nonunit: return rocblas_diagonal_non_unit;
         default: throw "Wrong diag type.";
     }
 }
 
-inline rocblas_side get_rocblas_side_mode(oneapi::mkl::side lr) {
+inline rocblas_side get_rocblas_side_mode(oneapi::math::side lr) {
     switch (lr) {
-        case oneapi::mkl::side::left: return rocblas_side_left;
-        case oneapi::mkl::side::right: return rocblas_side_right;
+        case oneapi::math::side::left: return rocblas_side_left;
+        case oneapi::math::side::right: return rocblas_side_right;
         default: throw "Wrong side mode.";
     }
 }
@@ -298,6 +298,6 @@ struct RocEquivalentType<sycl::half> {
 
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif // _ROCBLAS_HELPER_HPP_
diff --git a/src/blas/backends/rocblas/rocblas_level1.cpp b/src/blas/backends/rocblas/rocblas_level1.cpp
index 0eb5489df..31c96ea82 100644
--- a/src/blas/backends/rocblas/rocblas_level1.cpp
+++ b/src/blas/backends/rocblas/rocblas_level1.cpp
@@ -22,11 +22,11 @@
 #include "rocblas_helper.hpp"
 #include "rocblas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 namespace column_major {
@@ -43,7 +43,7 @@ inline void asum(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T1, 1>&
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the rocblas_pointer_mode_host
             // when the data is on buffer, it must be set to
@@ -86,7 +86,7 @@ inline void scal(Func func, sycl::queue& queue, int64_t n, T1 a, sycl::buffer<T2
 
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto x_ = sc.get_mem<rocDataType2*>(x_acc);
             rocblas_status err;
@@ -119,7 +119,7 @@ inline void axpy(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = sc.get_mem<rocDataType*>(x_acc);
@@ -176,7 +176,7 @@ inline void rotg(Func func, sycl::queue& queue, sycl::buffer<T1, 1>& a, sycl::bu
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
         auto s_acc = s.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the rocblas_pointer_mode_host
             // when the data is on buffer, it must be set to
@@ -221,7 +221,7 @@ inline void rotm(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T, 1>& x
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
         auto param_acc = param.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             // By default the pointer mode is the rocblas_pointer_mode_host
@@ -263,7 +263,7 @@ inline void copy(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T, 1>& x
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = sc.get_mem<rocDataType*>(x_acc);
@@ -297,7 +297,7 @@ inline void dot(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T, 1>& x,
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             // By default the pointer mode is the rocblas_pointer_mode_host
@@ -350,7 +350,7 @@ inline void rot(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T1, 1>& x
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             // By default the pointer mode is the rocblas_pointer_mode_host
             // when the data is on buffer, it must be set to
@@ -389,7 +389,7 @@ void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer<float, 1>& x,
         auto x_acc = x.get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             // By default the pointer mode is the rocblas_pointer_mode_host
@@ -427,7 +427,7 @@ inline void rotmg(Func func, sycl::queue& queue, sycl::buffer<T, 1>& d1, sycl::b
         auto x1_acc = x1.template get_access<sycl::access::mode::read_write>(cgh);
         auto y1_acc = y1_buff.template get_access<sycl::access::mode::read>(cgh);
         auto param_acc = param.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             // By default the pointer mode is the rocblas_pointer_mode_host
@@ -479,7 +479,7 @@ inline void iamax(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T, 1>&
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto int_res_acc = int_res_buff.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             // By default the pointer mode is the rocblas_pointer_mode_host
@@ -531,7 +531,7 @@ inline void swap(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T, 1>& x
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = sc.get_mem<rocDataType*>(x_acc);
@@ -572,7 +572,7 @@ inline void iamin(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T, 1>&
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto int_res_acc = int_res_buff.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             // By default the pointer mode is the rocblas_pointer_mode_host
@@ -625,7 +625,7 @@ inline void nrm2(Func func, sycl::queue& queue, int64_t n, sycl::buffer<T1, 1>&
     queue.submit([&](sycl::handler& cgh) {
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto res_acc = result.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             // By default the pointer mode is the rocblas_pointer_mode_host
@@ -671,7 +671,7 @@ inline sycl::event asum(Func func, sycl::queue& queue, int64_t n, const T1* x, c
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device);
 
@@ -709,7 +709,7 @@ inline sycl::event scal(Func func, sycl::queue& queue, int64_t n, T1 a, T2* x, i
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<rocDataType2*>(x);
@@ -745,7 +745,7 @@ inline sycl::event axpy(Func func, sycl::queue& queue, int64_t n, T alpha, const
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<const rocDataType*>(x);
@@ -802,7 +802,7 @@ inline sycl::event rotg(Func func, sycl::queue& queue, T1* a, T1* b, T2* c, T1*
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType1*>(a);
@@ -838,7 +838,7 @@ inline sycl::event rotm(Func func, sycl::queue& queue, int64_t n, T* x, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<rocDataType*>(x);
@@ -871,7 +871,7 @@ inline sycl::event copy(Func func, sycl::queue& queue, int64_t n, const T* x, in
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<const rocDataType*>(x);
@@ -906,7 +906,7 @@ inline sycl::event dot(Func func, sycl::queue& queue, int64_t n, const T* x, con
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<const rocDataType*>(x);
@@ -951,7 +951,7 @@ inline sycl::event rot(Func func, sycl::queue& queue, int64_t n, T1* x, const in
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<rocDataType1*>(x);
@@ -987,7 +987,7 @@ sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int6
     // rocBLAS does not support sdot so we need to mimic sdot.
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<const float*>(x);
@@ -1010,7 +1010,7 @@ inline sycl::event rotmg(Func func, sycl::queue& queue, T* d1, T* d2, T* x1, T y
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto d1_ = reinterpret_cast<rocDataType*>(d1);
@@ -1053,7 +1053,7 @@ inline sycl::event iamax(Func func, sycl::queue& queue, int64_t n, const T* x, c
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device);
             auto x_ = reinterpret_cast<const rocDataType*>(x);
@@ -1092,7 +1092,7 @@ inline sycl::event swap(Func func, sycl::queue& queue, int64_t n, T* x, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto x_ = reinterpret_cast<rocDataType*>(x);
@@ -1134,7 +1134,7 @@ inline sycl::event iamin(Func func, sycl::queue& queue, int64_t n, const T* x, c
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device);
 
@@ -1175,7 +1175,7 @@ inline sycl::event nrm2(Func func, sycl::queue& queue, int64_t n, const T1* x, c
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device);
 
@@ -1776,5 +1776,5 @@ NRM2_LAUNCHER_USM(std::complex<double>, double, rocblas_dznrm2)
 } // namespace row_major
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/rocblas/rocblas_level2.cpp b/src/blas/backends/rocblas/rocblas_level2.cpp
index e0cc63183..20c08c5ce 100644
--- a/src/blas/backends/rocblas/rocblas_level2.cpp
+++ b/src/blas/backends/rocblas/rocblas_level2.cpp
@@ -22,8 +22,8 @@
 #include "rocblas_helper.hpp"
 #include "rocblas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp"
 
 // Helper Functions
 
@@ -74,7 +74,7 @@ static inline void conj_vector(sycl::handler& cgh, T* ptr_a, T* ptr_b, const int
 }
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 namespace column_major {
@@ -92,7 +92,7 @@ inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int6
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -131,7 +131,7 @@ inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int6
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -170,7 +170,7 @@ inline void ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, sy
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -210,7 +210,7 @@ inline void hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -247,7 +247,7 @@ inline void hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -284,7 +284,7 @@ inline void her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, Scal
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -319,7 +319,7 @@ inline void her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -355,7 +355,7 @@ inline void hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -390,7 +390,7 @@ inline void hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, Scal
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -424,7 +424,7 @@ inline void hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -460,7 +460,7 @@ inline void sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -497,7 +497,7 @@ inline void symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -532,7 +532,7 @@ inline void syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T al
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -569,7 +569,7 @@ inline void syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -608,7 +608,7 @@ inline void spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -642,7 +642,7 @@ inline void spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T al
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -676,7 +676,7 @@ inline void spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T a
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read>(cgh);
         auto y_acc = y.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -711,7 +711,7 @@ inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -748,7 +748,7 @@ inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -784,7 +784,7 @@ inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -819,7 +819,7 @@ inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -855,7 +855,7 @@ inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -891,7 +891,7 @@ inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto x_acc = x.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -928,7 +928,7 @@ inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -969,7 +969,7 @@ inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1010,7 +1010,7 @@ inline sycl::event ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T al
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1050,7 +1050,7 @@ inline sycl::event hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1088,7 +1088,7 @@ inline sycl::event hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1127,7 +1127,7 @@ inline sycl::event her(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1162,7 +1162,7 @@ inline sycl::event her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1199,7 +1199,7 @@ inline sycl::event hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1237,7 +1237,7 @@ inline sycl::event hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1272,7 +1272,7 @@ inline sycl::event hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1309,7 +1309,7 @@ inline sycl::event sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1347,7 +1347,7 @@ inline sycl::event symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1385,7 +1385,7 @@ inline sycl::event syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1423,7 +1423,7 @@ inline sycl::event syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1463,7 +1463,7 @@ inline sycl::event spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1499,7 +1499,7 @@ inline sycl::event spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t
     overflow_check(n, incx);
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1533,7 +1533,7 @@ inline sycl::event spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<rocDataType*>(a);
@@ -1570,7 +1570,7 @@ inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1609,7 +1609,7 @@ inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1648,7 +1648,7 @@ inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1687,7 +1687,7 @@ inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1726,7 +1726,7 @@ inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1765,7 +1765,7 @@ inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -1806,10 +1806,10 @@ inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int6
                  std::complex<T> alpha, sycl::buffer<std::complex<T>, 1>& a, int64_t lda,
                  sycl::buffer<std::complex<T>, 1>& x, int64_t incx, std::complex<T> beta,
                  sycl::buffer<std::complex<T>, 1>& y, int64_t incy) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         alpha = std::conj(alpha);
         beta = std::conj(beta);
 
@@ -1824,7 +1824,7 @@ inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int6
 
     column_major::gemv(func, queue, new_trans, n, m, alpha, a, lda, x, incx, beta, y, incy);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); });
         }
@@ -1835,8 +1835,8 @@ template <typename Func, typename T>
 inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, T alpha,
                  sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x, int64_t incx, T beta,
                  sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::gemv(func, queue, new_trans, n, m, alpha, a, lda, x, incx, beta, y, incy);
 }
@@ -1860,10 +1860,10 @@ inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int6
                  int64_t ku, std::complex<T> alpha, sycl::buffer<std::complex<T>, 1>& a,
                  int64_t lda, sycl::buffer<std::complex<T>, 1>& x, int64_t incx,
                  std::complex<T> beta, sycl::buffer<std::complex<T>, 1>& y, int64_t incy) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         alpha = std::conj(alpha);
         beta = std::conj(beta);
 
@@ -1878,7 +1878,7 @@ inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int6
 
     column_major::gbmv(func, queue, new_trans, n, m, ku, kl, alpha, a, lda, x, incx, beta, y, incy);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); });
         }
@@ -1889,8 +1889,8 @@ template <typename Func, typename T>
 inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl,
                  int64_t ku, T alpha, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x,
                  int64_t incx, T beta, sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::gbmv(func, queue, new_trans, n, m, ku, kl, alpha, a, lda, x, incx, beta, y, incy);
 }
@@ -1956,8 +1956,8 @@ template <typename Func, typename T>
 inline void hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha,
                  sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x, int64_t incx, T beta,
                  sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
     auto new_alpha = std::conj(alpha);
     auto new_beta = std::conj(beta);
 
@@ -1988,8 +1988,8 @@ template <typename Func, typename T>
 inline void hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x, int64_t incx, T beta,
                  sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
     auto new_alpha = std::conj(alpha);
     auto new_beta = std::conj(beta);
 
@@ -2020,8 +2020,8 @@ template <typename Func, typename ScalarType, typename DataType>
 inline void her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha,
                 sycl::buffer<DataType, 1>& x, int64_t incx, sycl::buffer<DataType, 1>& a,
                 int64_t lda) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
@@ -2046,8 +2046,8 @@ template <typename Func, typename T>
 inline void her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& x, int64_t incx, sycl::buffer<T, 1>& y, int64_t incy,
                  sycl::buffer<T, 1>& a, int64_t lda) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); });
@@ -2072,8 +2072,8 @@ template <typename Func, typename T>
 inline void hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& a, sycl::buffer<T, 1>& x, int64_t incx, T beta,
                  sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
     auto new_alpha = std::conj(alpha);
     auto new_beta = std::conj(beta);
 
@@ -2103,8 +2103,8 @@ HPMV_LAUNCHER(std::complex<double>, rocblas_zhpmv)
 template <typename Func, typename ScalarType, typename DataType>
 inline void hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha,
                 sycl::buffer<DataType, 1>& x, int64_t incx, sycl::buffer<DataType, 1>& a) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
@@ -2128,8 +2128,8 @@ template <typename Func, typename T>
 inline void hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& x, int64_t incx, sycl::buffer<T, 1>& y, int64_t incy,
                  sycl::buffer<T, 1>& a) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); });
@@ -2154,8 +2154,8 @@ template <typename Func, typename T>
 inline void sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha,
                  sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x, int64_t incx, T beta,
                  sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::sbmv(func, queue, new_uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
 }
@@ -2176,8 +2176,8 @@ template <typename Func, typename T>
 inline void symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x, int64_t incx, T beta,
                  sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::symv(func, queue, new_uplo, n, alpha, a, lda, x, incx, beta, y, incy);
 }
@@ -2197,8 +2197,8 @@ SYMV_LAUNCHER(double, rocblas_dsymv)
 template <typename Func, typename T>
 inline void syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                 sycl::buffer<T, 1>& x, int64_t incx, sycl::buffer<T, 1>& a, int64_t lda) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::syr(func, queue, new_uplo, n, alpha, x, incx, a, lda);
 }
@@ -2221,8 +2221,8 @@ template <typename Func, typename T>
 inline void syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& x, int64_t incx, sycl::buffer<T, 1>& y, int64_t incy,
                  sycl::buffer<T, 1>& a, int64_t lda) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::syr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a, lda);
 }
@@ -2246,8 +2246,8 @@ template <typename Func, typename T>
 inline void spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& a, sycl::buffer<T, 1>& x, int64_t incx, T beta,
                  sycl::buffer<T, 1>& y, int64_t incy) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::spmv(func, queue, new_uplo, n, alpha, a, x, incx, beta, y, incy);
 }
@@ -2267,8 +2267,8 @@ SPMV_LAUNCHER(double, rocblas_dspmv)
 template <typename Func, typename T>
 inline void spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                 sycl::buffer<T, 1>& x, int64_t incx, sycl::buffer<T, 1>& a) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::spr(func, queue, new_uplo, n, alpha, x, incx, a);
 }
@@ -2288,8 +2288,8 @@ template <typename Func, typename T>
 inline void spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                  sycl::buffer<T, 1>& x, int64_t incx, sycl::buffer<T, 1>& y, int64_t incy,
                  sycl::buffer<T, 1>& a) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::spr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a);
 }
@@ -2310,12 +2310,12 @@ template <typename Func, typename T>
 inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, int64_t k, sycl::buffer<std::complex<T>, 1>& a, int64_t lda,
                  sycl::buffer<std::complex<T>, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2323,7 +2323,7 @@ inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 
     column_major::tbmv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2334,10 +2334,10 @@ template <typename Func, typename T>
 inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, int64_t k, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x,
                  int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::tbmv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx);
 }
@@ -2360,12 +2360,12 @@ template <typename Func, typename T>
 inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, int64_t k, sycl::buffer<std::complex<T>, 1>& a, int64_t lda,
                  sycl::buffer<std::complex<T>, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2373,7 +2373,7 @@ inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 
     column_major::tbsv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2384,10 +2384,10 @@ template <typename Func, typename T>
 inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, int64_t k, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x,
                  int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::tbsv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx);
 }
@@ -2410,12 +2410,12 @@ template <typename Func, typename T>
 inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<std::complex<T>, 1>& a,
                  sycl::buffer<std::complex<T>, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2423,7 +2423,7 @@ inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 
     column_major::tpmv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2433,10 +2433,10 @@ inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 template <typename Func, typename T>
 inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<T, 1>& a, sycl::buffer<T, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::tpmv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx);
 }
@@ -2458,12 +2458,12 @@ template <typename Func, typename T>
 inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<std::complex<T>, 1>& a,
                  sycl::buffer<std::complex<T>, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2471,7 +2471,7 @@ inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 
     column_major::tpsv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2481,10 +2481,10 @@ inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 template <typename Func, typename T>
 inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<T, 1>& a, sycl::buffer<T, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::tpsv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx);
 }
@@ -2506,12 +2506,12 @@ template <typename Func, typename T>
 inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<std::complex<T>, 1>& a, int64_t lda,
                  sycl::buffer<std::complex<T>, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2519,7 +2519,7 @@ inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 
     column_major::trmv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2530,10 +2530,10 @@ template <typename Func, typename T>
 inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x,
                  int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::trmv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx);
 }
@@ -2555,12 +2555,12 @@ template <typename Func, typename T>
 inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<std::complex<T>, 1>& a, int64_t lda,
                  sycl::buffer<std::complex<T>, 1>& x, int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2568,7 +2568,7 @@ inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
 
     column_major::trsv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); });
         }
@@ -2579,10 +2579,10 @@ template <typename Func, typename T>
 inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag,
                  int64_t n, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& x,
                  int64_t incx) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::trsv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx);
 }
@@ -2610,10 +2610,10 @@ inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t
                         const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         alpha = std::conj(alpha);
         beta = std::conj(beta);
 
@@ -2632,7 +2632,7 @@ inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t
     done = column_major::gemv(func, queue, new_trans, n, m, alpha, a, lda, x, incx, beta, y, incy,
                               dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             done = queue.submit([&](sycl::handler& cgh) {
                 cgh.depends_on(done);
@@ -2648,8 +2648,8 @@ template <typename Func, typename T>
 inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n,
                         T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y,
                         int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::gemv(func, queue, new_trans, n, m, alpha, a, lda, x, incx, beta, y, incy,
                               dependencies);
@@ -2678,10 +2678,10 @@ inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t
                         const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         alpha = std::conj(alpha);
         beta = std::conj(beta);
 
@@ -2700,7 +2700,7 @@ inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t
     done = column_major::gbmv(func, queue, new_trans, n, m, ku, kl, alpha, a, lda, x, incx, beta, y,
                               incy, dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             done = queue.submit([&](sycl::handler& cgh) {
                 cgh.depends_on(done);
@@ -2717,8 +2717,8 @@ inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t
                         int64_t kl, int64_t ku, T alpha, const T* a, int64_t lda, const T* x,
                         int64_t incx, T beta, T* y, int64_t incy,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::gbmv(func, queue, new_trans, n, m, ku, kl, alpha, a, lda, x, incx, beta, y,
                               incy, dependencies);
@@ -2791,8 +2791,8 @@ inline sycl::event hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
                         int64_t incy, const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
     auto new_alpha = std::conj(alpha);
     auto new_beta = std::conj(beta);
 
@@ -2833,8 +2833,8 @@ inline sycl::event hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
                         int64_t incy, const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
     auto new_alpha = std::conj(alpha);
     auto new_beta = std::conj(beta);
 
@@ -2873,8 +2873,8 @@ template <typename Func, typename ScalarType, typename DataType>
 inline sycl::event her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n,
                        const ScalarType alpha, const DataType* x, int64_t incx, DataType* a,
                        int64_t lda, const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (DataType*)x, n, incx); })
@@ -2900,8 +2900,8 @@ template <typename Func, typename T>
 inline sycl::event her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                         const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, (T*)y, n, incx, incy); })
@@ -2931,8 +2931,8 @@ inline sycl::event hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t
                         const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
     auto new_alpha = std::conj(alpha);
     auto new_beta = std::conj(beta);
 
@@ -2971,8 +2971,8 @@ template <typename Func, typename ScalarType, typename DataType>
 inline sycl::event hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n,
                        const ScalarType alpha, const DataType* x, int64_t incx, DataType* a,
                        const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (DataType*)x, n, incx); })
@@ -2998,8 +2998,8 @@ template <typename Func, typename T>
 inline sycl::event hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                         const T* x, int64_t incx, const T* y, int64_t incy, T* a,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     if (n > 0) {
         queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, (T*)y, n, incx, incy); })
@@ -3026,8 +3026,8 @@ template <typename Func, typename T>
 inline sycl::event sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k,
                         T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y,
                         int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::sbmv(func, queue, new_uplo, n, k, alpha, a, lda, x, incx, beta, y, incy,
                               dependencies);
@@ -3050,8 +3050,8 @@ template <typename Func, typename T>
 inline sycl::event symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                         const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y,
                         int64_t incy, const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::symv(func, queue, new_uplo, n, alpha, a, lda, x, incx, beta, y, incy,
                               dependencies);
@@ -3074,8 +3074,8 @@ template <typename Func, typename T>
 inline sycl::event syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                        const T* x, int64_t incx, T* a, int64_t lda,
                        const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::syr(func, queue, new_uplo, n, alpha, x, incx, a, lda, dependencies);
 }
@@ -3099,8 +3099,8 @@ template <typename Func, typename T>
 inline sycl::event syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                         const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::syr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a, lda,
                               dependencies);
@@ -3126,8 +3126,8 @@ template <typename Func, typename T>
 inline sycl::event spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                         const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::spmv(func, queue, new_uplo, n, alpha, a, x, incx, beta, y, incy,
                               dependencies);
@@ -3150,8 +3150,8 @@ template <typename Func, typename T>
 inline sycl::event spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                        const T* x, int64_t incx, T* a,
                        const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::spr(func, queue, new_uplo, n, alpha, x, incx, a, dependencies);
 }
@@ -3171,8 +3171,8 @@ template <typename Func, typename T>
 inline sycl::event spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha,
                         const T* x, int64_t incx, const T* y, int64_t incy, T* a,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::spr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a, dependencies);
 }
@@ -3197,12 +3197,12 @@ inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
                         const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); })
                 .wait_and_throw();
@@ -3212,7 +3212,7 @@ inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
     done = column_major::tbmv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx,
                               dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             done = queue.submit([&](sycl::handler& cgh) {
                 cgh.depends_on(done);
@@ -3228,10 +3228,10 @@ template <typename Func, typename T>
 inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans,
                         diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x,
                         int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::tbmv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx,
                               dependencies);
@@ -3259,12 +3259,12 @@ inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
                         const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); })
                 .wait_and_throw();
@@ -3274,7 +3274,7 @@ inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
     done = column_major::tbsv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx,
                               dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             done = queue.submit([&](sycl::handler& cgh) {
                 cgh.depends_on(done);
@@ -3290,10 +3290,10 @@ template <typename Func, typename T>
 inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans,
                         diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x,
                         int64_t incx, const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::tbsv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx,
                               dependencies);
@@ -3320,12 +3320,12 @@ inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
                         int64_t incx, const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); })
                 .wait_and_throw();
@@ -3335,7 +3335,7 @@ inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
     done = column_major::tpmv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx,
                               dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             incx = std::abs(incx);
 
@@ -3353,10 +3353,10 @@ template <typename Func, typename T>
 inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans,
                         diag unit_diag, int64_t n, const T* a, T* x, int64_t incx,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::tpmv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx,
                               dependencies);
@@ -3383,12 +3383,12 @@ inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
                         int64_t incx, const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); })
                 .wait_and_throw();
@@ -3398,7 +3398,7 @@ inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
     done = column_major::tpsv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx,
                               dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             incx = std::abs(incx);
 
@@ -3416,10 +3416,10 @@ template <typename Func, typename T>
 inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans,
                         diag unit_diag, int64_t n, const T* a, T* x, int64_t incx,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::tpsv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx,
                               dependencies);
@@ -3447,12 +3447,12 @@ inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
                         const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); })
                 .wait_and_throw();
@@ -3462,7 +3462,7 @@ inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpo
     done = column_major::trmv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx,
                               dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             done = queue.submit([&](sycl::handler& cgh) {
                 cgh.depends_on(done);
@@ -3478,10 +3478,10 @@ template <typename Func, typename T>
 inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans,
                         diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::trmv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx,
                               dependencies);
@@ -3509,12 +3509,12 @@ inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
                         const std::vector<sycl::event>& dependencies) {
     sycl::event done;
 
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); })
                 .wait_and_throw();
@@ -3524,7 +3524,7 @@ inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpo
     done = column_major::trsv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx,
                               dependencies);
 
-    if (trans == oneapi::mkl::transpose::conjtrans) {
+    if (trans == oneapi::math::transpose::conjtrans) {
         if (n > 0) {
             done = queue.submit([&](sycl::handler& cgh) {
                 cgh.depends_on(done);
@@ -3540,10 +3540,10 @@ template <typename Func, typename T>
 inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans,
                         diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::trsv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx,
                               dependencies);
@@ -3567,5 +3567,5 @@ TRSV_LAUNCHER_USM(std::complex<double>, rocblas_ztrsv)
 } // namespace row_major
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/rocblas/rocblas_level3.cpp b/src/blas/backends/rocblas/rocblas_level3.cpp
index fe0dc4090..1ec25bb29 100644
--- a/src/blas/backends/rocblas/rocblas_level3.cpp
+++ b/src/blas/backends/rocblas/rocblas_level3.cpp
@@ -22,11 +22,11 @@
 #include "rocblas_helper.hpp"
 #include "rocblas_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 namespace column_major {
@@ -44,7 +44,7 @@ inline void gemm(Func func, sycl::queue& queue, transpose transa, transpose tran
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -91,7 +91,7 @@ inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType_A*>(a_acc);
@@ -140,7 +140,7 @@ inline void symm(Func func, sycl::queue& queue, side left_right, uplo upper_lowe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -180,7 +180,7 @@ inline void hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lowe
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -217,7 +217,7 @@ inline void syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -255,7 +255,7 @@ inline void herk(Func func, sycl::queue& queue, uplo upper_lower, transpose tran
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -291,7 +291,7 @@ inline void syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose tra
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -333,7 +333,7 @@ inline void her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose tra
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -375,7 +375,7 @@ inline void trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lowe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -421,7 +421,7 @@ inline void trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lowe
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = sc.get_mem<rocDataType*>(a_acc);
@@ -462,7 +462,7 @@ inline sycl::event gemm(Func func, sycl::queue& queue, transpose transa, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -509,7 +509,7 @@ inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType_A*>(a);
@@ -559,7 +559,7 @@ inline sycl::event symm(Func func, sycl::queue& queue, side left_right, uplo upp
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -600,7 +600,7 @@ inline sycl::event hemm(Func func, sycl::queue& queue, side left_right, uplo upp
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -639,7 +639,7 @@ inline sycl::event syrk(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -680,7 +680,7 @@ inline sycl::event herk(Func func, sycl::queue& queue, uplo upper_lower, transpo
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -719,7 +719,7 @@ inline sycl::event syr2k(Func func, sycl::queue& queue, uplo upper_lower, transp
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -762,7 +762,7 @@ inline sycl::event her2k(Func func, sycl::queue& queue, uplo upper_lower, transp
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -806,7 +806,7 @@ inline sycl::event trmm(Func func, sycl::queue& queue, side left_right, uplo upp
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -854,7 +854,7 @@ inline sycl::event trsm(Func func, sycl::queue& queue, side left_right, uplo upp
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
+        onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
 
             auto a_ = reinterpret_cast<const rocDataType*>(a);
@@ -959,10 +959,10 @@ template <typename Func, typename T>
 inline void symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m,
                  int64_t n, T alpha, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& b,
                  int64_t ldb, T beta, sycl::buffer<T, 1>& c, int64_t ldc) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::symm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c, ldc);
 }
@@ -986,10 +986,10 @@ template <typename Func, typename T>
 inline void hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m,
                  int64_t n, T alpha, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& b,
                  int64_t ldb, T beta, sycl::buffer<T, 1>& c, int64_t ldc) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::hemm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c, ldc);
 }
@@ -1011,10 +1011,10 @@ template <typename Func, typename T>
 inline void syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n,
                  int64_t k, T alpha, sycl::buffer<T, 1>& a, int64_t lda, T beta,
                  sycl::buffer<T, 1>& c, int64_t ldc) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::syrk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc);
 }
@@ -1037,10 +1037,10 @@ template <typename Func, typename DataType, typename ScalarType>
 inline void herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n,
                  int64_t k, ScalarType alpha, sycl::buffer<DataType, 1>& a, int64_t lda,
                  ScalarType beta, sycl::buffer<DataType, 1>& c, int64_t ldc) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::herk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc);
 }
@@ -1061,10 +1061,10 @@ template <typename Func, typename T>
 inline void syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n,
                   int64_t k, T alpha, sycl::buffer<T, 1>& a, int64_t lda, sycl::buffer<T, 1>& b,
                   int64_t ldb, T beta, sycl::buffer<T, 1>& c, int64_t ldc) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     column_major::syr2k(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, b, ldb, beta, c,
                         ldc);
@@ -1090,10 +1090,10 @@ inline void her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose tra
                   int64_t k, DataType alpha, sycl::buffer<DataType, 1>& a, int64_t lda,
                   sycl::buffer<DataType, 1>& b, int64_t ldb, ScalarType beta,
                   sycl::buffer<DataType, 1>& c, int64_t ldc) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans
+                                                                : oneapi::math::transpose::nontrans;
     auto new_alpha = std::conj(alpha);
 
     column_major::her2k(func, queue, new_uplo, new_trans, n, k, new_alpha, a, lda, b, ldb, beta, c,
@@ -1122,10 +1122,10 @@ template <typename Func, typename T>
 inline void trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer<T, 1>& a, int64_t lda,
                  sycl::buffer<T, 1>& b, int64_t ldb) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::trmm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, b,
                        ldb);
@@ -1150,10 +1150,10 @@ template <typename Func, typename T>
 inline void trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans,
                  diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer<T, 1>& a, int64_t lda,
                  sycl::buffer<T, 1>& b, int64_t ldb) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     column_major::trsm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, b,
                        ldb);
@@ -1246,10 +1246,10 @@ template <typename Func, typename T>
 inline sycl::event symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m,
                         int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb,
                         T beta, T* c, int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::symm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c,
                               ldc, dependencies);
@@ -1275,10 +1275,10 @@ template <typename Func, typename T>
 inline sycl::event hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m,
                         int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb,
                         T beta, T* c, int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::hemm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c,
                               ldc, dependencies);
@@ -1302,10 +1302,10 @@ template <typename Func, typename T>
 inline sycl::event syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n,
                         int64_t k, T alpha, const T* a, int64_t lda, T beta, T* c, int64_t ldc,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::syrk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc,
                               dependencies);
@@ -1331,10 +1331,10 @@ inline sycl::event herk(Func func, sycl::queue& queue, uplo upper_lower, transpo
                         int64_t k, const ScalarType alpha, const DataType* a, int64_t lda,
                         const ScalarType beta, DataType* c, int64_t ldc,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::herk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc,
                               dependencies);
@@ -1359,10 +1359,10 @@ inline sycl::event syr2k(Func func, sycl::queue& queue, uplo upper_lower, transp
                          int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* b,
                          int64_t ldb, T beta, T* c, int64_t ldc,
                          const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans
+                                                                : oneapi::math::transpose::nontrans;
 
     return column_major::syr2k(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, b, ldb, beta,
                                c, ldc, dependencies);
@@ -1389,10 +1389,10 @@ inline sycl::event her2k(Func func, sycl::queue& queue, uplo upper_lower, transp
                          int64_t n, int64_t k, const DataType alpha, const DataType* a, int64_t lda,
                          const DataType* b, int64_t ldb, const ScalarType beta, DataType* c,
                          int64_t ldc, const std::vector<sycl::event>& dependencies) {
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
-    auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans
-                                                               : oneapi::mkl::transpose::nontrans;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
+    auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans
+                                                                : oneapi::math::transpose::nontrans;
     auto new_alpha = std::conj(alpha);
 
     return column_major::her2k(func, queue, new_uplo, new_trans, n, k, new_alpha, a, lda, b, ldb,
@@ -1422,10 +1422,10 @@ inline sycl::event trmm(Func func, sycl::queue& queue, side left_right, uplo upp
                         transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a,
                         int64_t lda, T* b, int64_t ldb,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::trmm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a,
                               lda, b, ldb, dependencies);
@@ -1451,10 +1451,10 @@ inline sycl::event trsm(Func func, sycl::queue& queue, side left_right, uplo upp
                         transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a,
                         int64_t lda, T* b, int64_t ldb,
                         const std::vector<sycl::event>& dependencies) {
-    auto new_side =
-        left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left;
-    auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper
-                                                            : oneapi::mkl::uplo::lower;
+    auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right
+                                                           : oneapi::math::side::left;
+    auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper
+                                                             : oneapi::math::uplo::lower;
 
     return column_major::trsm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a,
                               lda, b, ldb, dependencies);
@@ -1478,5 +1478,5 @@ TRSM_LAUNCHER_USM(std::complex<double>, rocblas_ztrsm)
 } // namespace row_major
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/rocblas/rocblas_scope_handle.cpp b/src/blas/backends/rocblas/rocblas_scope_handle.cpp
index 9574271b4..8b59e3f99 100644
--- a/src/blas/backends/rocblas/rocblas_scope_handle.cpp
+++ b/src/blas/backends/rocblas/rocblas_scope_handle.cpp
@@ -20,7 +20,7 @@
 #include "rocblas_scope_handle.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 
@@ -50,7 +50,7 @@ rocblas_handle_container<T>::~rocblas_handle_container() noexcept(false) {
  * takes place if no other element in the container has a key equivalent to
  * the one being emplaced (keys in a map container are unique).
  */
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
 thread_local rocblas_handle_container<ur_context_handle_t>
     RocblasScopedContextHandler::handle_helper = rocblas_handle_container<ur_context_handle_t>{};
 #else
@@ -113,7 +113,7 @@ rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue& queue)
     hipError_t hipErr;
     hipCtx_t desired;
     HIP_ERROR_FUNC(hipDevicePrimaryCtxRetain, hipErr, &desired, hipDevice);
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
     auto piPlacedContext_ = reinterpret_cast<ur_context_handle_t>(desired);
 #else
     auto piPlacedContext_ = reinterpret_cast<pi_context>(desired);
@@ -163,5 +163,5 @@ sycl::context RocblasScopedContextHandler::get_context(const sycl::queue& queue)
 
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/blas/backends/rocblas/rocblas_scope_handle.hpp b/src/blas/backends/rocblas/rocblas_scope_handle.hpp
index 7b2438a54..f5dbd6f23 100644
--- a/src/blas/backends/rocblas/rocblas_scope_handle.hpp
+++ b/src/blas/backends/rocblas/rocblas_scope_handle.hpp
@@ -29,8 +29,8 @@
 // After Plugin Interface removal in DPC++ ur.hpp is the new include
 #if __has_include(<sycl/detail/ur.hpp>)
 #include <sycl/detail/ur.hpp>
-#ifndef ONEMKL_PI_INTERFACE_REMOVED
-#define ONEMKL_PI_INTERFACE_REMOVED
+#ifndef ONEMATH_PI_INTERFACE_REMOVED
+#define ONEMATH_PI_INTERFACE_REMOVED
 #endif
 #elif __has_include(<sycl/detail/pi.hpp>)
 #include <sycl/detail/pi.hpp>
@@ -39,7 +39,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 
@@ -55,7 +55,7 @@ class RocblasScopedContextHandler {
     sycl::context* placedContext_;
     bool needToRecover_;
     sycl::interop_handle& interop_h;
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
     static thread_local rocblas_handle_container<ur_context_handle_t> handle_helper;
 #else
     static thread_local rocblas_handle_container<pi_context> handle_helper;
@@ -79,6 +79,6 @@ class RocblasScopedContextHandler {
 
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif //_ROCBLAS_SCOPED_HANDLE_HPP_
diff --git a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp
index 64d883b52..5ac12ca0e 100644
--- a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp
+++ b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp
@@ -22,7 +22,7 @@
 #include "rocblas_scope_handle_hipsycl.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 
@@ -90,5 +90,5 @@ hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue& queue) {
 
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
\ No newline at end of file
diff --git a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp
index 07d0d8292..48dfd433b 100644
--- a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp
+++ b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp
@@ -30,7 +30,7 @@
 #include <unordered_map>
 #include "rocblas_helper.hpp"
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 
@@ -61,6 +61,6 @@ class RocblasScopedContextHandler {
 
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif //_ROCBLAS_SCOPED_HANDLE_HPP_
diff --git a/src/blas/backends/rocblas/rocblas_task.hpp b/src/blas/backends/rocblas/rocblas_task.hpp
index a855d9a72..ae48720c9 100644
--- a/src/blas/backends/rocblas/rocblas_task.hpp
+++ b/src/blas/backends/rocblas/rocblas_task.hpp
@@ -27,7 +27,7 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #ifndef __HIPSYCL__
 #include "rocblas_scope_handle.hpp"
 #else
@@ -37,8 +37,8 @@
 // After Plugin Interface removal in DPC++ ur.hpp is the new include
 #if __has_include(<sycl/detail/ur.hpp>)
 #include <sycl/detail/ur.hpp>
-#ifndef ONEMKL_PI_INTERFACE_REMOVED
-#define ONEMKL_PI_INTERFACE_REMOVED
+#ifndef ONEMATH_PI_INTERFACE_REMOVED
+#define ONEMATH_PI_INTERFACE_REMOVED
 #endif
 #elif __has_include(<sycl/detail/pi.hpp>)
 #include <sycl/detail/pi.hpp>
@@ -47,7 +47,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace rocblas {
 
@@ -73,12 +73,12 @@ static inline void host_task_internal(H& cgh, sycl::queue queue, F f) {
 }
 #endif
 template <typename H, typename F>
-static inline void onemkl_rocblas_host_task(H& cgh, sycl::queue queue, F f) {
+static inline void onemath_rocblas_host_task(H& cgh, sycl::queue queue, F f) {
     (void)host_task_internal(cgh, queue, f);
 }
 
 } // namespace rocblas
 } // namespace blas
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif // _ROCBLAS_TASK_HPP_
diff --git a/src/blas/backends/rocblas/rocblas_wrappers.cpp b/src/blas/backends/rocblas/rocblas_wrappers.cpp
index ce4c92da5..111944283 100644
--- a/src/blas/backends/rocblas/rocblas_wrappers.cpp
+++ b/src/blas/backends/rocblas/rocblas_wrappers.cpp
@@ -19,990 +19,990 @@
 *
 **************************************************************************/
 #include "blas/function_table.hpp"
-#include "oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hpp"
+#include "oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" blas_function_table_t mkl_blas_table = {
+extern "C" blas_function_table_t onemath_blas_table = {
     WRAPPER_VERSION,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::dot,
-    oneapi::mkl::blas::rocblas::column_major::dot,
-    oneapi::mkl::blas::rocblas::column_major::dot,
-    oneapi::mkl::blas::rocblas::column_major::dotc,
-    oneapi::mkl::blas::rocblas::column_major::dotc,
-    oneapi::mkl::blas::rocblas::column_major::dotu,
-    oneapi::mkl::blas::rocblas::column_major::dotu,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotm,
-    oneapi::mkl::blas::rocblas::column_major::rotm,
-    oneapi::mkl::blas::rocblas::column_major::rotmg,
-    oneapi::mkl::blas::rocblas::column_major::rotmg,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::sdsdot,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::ger,
-    oneapi::mkl::blas::rocblas::column_major::ger,
-    oneapi::mkl::blas::rocblas::column_major::gerc,
-    oneapi::mkl::blas::rocblas::column_major::gerc,
-    oneapi::mkl::blas::rocblas::column_major::geru,
-    oneapi::mkl::blas::rocblas::column_major::geru,
-    oneapi::mkl::blas::rocblas::column_major::hbmv,
-    oneapi::mkl::blas::rocblas::column_major::hbmv,
-    oneapi::mkl::blas::rocblas::column_major::hemv,
-    oneapi::mkl::blas::rocblas::column_major::hemv,
-    oneapi::mkl::blas::rocblas::column_major::her,
-    oneapi::mkl::blas::rocblas::column_major::her,
-    oneapi::mkl::blas::rocblas::column_major::her2,
-    oneapi::mkl::blas::rocblas::column_major::her2,
-    oneapi::mkl::blas::rocblas::column_major::hpmv,
-    oneapi::mkl::blas::rocblas::column_major::hpmv,
-    oneapi::mkl::blas::rocblas::column_major::hpr,
-    oneapi::mkl::blas::rocblas::column_major::hpr,
-    oneapi::mkl::blas::rocblas::column_major::hpr2,
-    oneapi::mkl::blas::rocblas::column_major::hpr2,
-    oneapi::mkl::blas::rocblas::column_major::sbmv,
-    oneapi::mkl::blas::rocblas::column_major::sbmv,
-    oneapi::mkl::blas::rocblas::column_major::spmv,
-    oneapi::mkl::blas::rocblas::column_major::spmv,
-    oneapi::mkl::blas::rocblas::column_major::spr,
-    oneapi::mkl::blas::rocblas::column_major::spr,
-    oneapi::mkl::blas::rocblas::column_major::spr2,
-    oneapi::mkl::blas::rocblas::column_major::spr2,
-    oneapi::mkl::blas::rocblas::column_major::symv,
-    oneapi::mkl::blas::rocblas::column_major::symv,
-    oneapi::mkl::blas::rocblas::column_major::syr,
-    oneapi::mkl::blas::rocblas::column_major::syr,
-    oneapi::mkl::blas::rocblas::column_major::syr2,
-    oneapi::mkl::blas::rocblas::column_major::syr2,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::hemm,
-    oneapi::mkl::blas::rocblas::column_major::hemm,
-    oneapi::mkl::blas::rocblas::column_major::herk,
-    oneapi::mkl::blas::rocblas::column_major::herk,
-    oneapi::mkl::blas::rocblas::column_major::her2k,
-    oneapi::mkl::blas::rocblas::column_major::her2k,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::asum,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::axpby,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::copy_batch,
-    oneapi::mkl::blas::rocblas::column_major::dot,
-    oneapi::mkl::blas::rocblas::column_major::dot,
-    oneapi::mkl::blas::rocblas::column_major::dot,
-    oneapi::mkl::blas::rocblas::column_major::dotc,
-    oneapi::mkl::blas::rocblas::column_major::dotc,
-    oneapi::mkl::blas::rocblas::column_major::dotu,
-    oneapi::mkl::blas::rocblas::column_major::dotu,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamin,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::iamax,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::nrm2,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rot,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotg,
-    oneapi::mkl::blas::rocblas::column_major::rotm,
-    oneapi::mkl::blas::rocblas::column_major::rotm,
-    oneapi::mkl::blas::rocblas::column_major::rotmg,
-    oneapi::mkl::blas::rocblas::column_major::rotmg,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::scal,
-    oneapi::mkl::blas::rocblas::column_major::sdsdot,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::swap,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gbmv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::column_major::ger,
-    oneapi::mkl::blas::rocblas::column_major::ger,
-    oneapi::mkl::blas::rocblas::column_major::gerc,
-    oneapi::mkl::blas::rocblas::column_major::gerc,
-    oneapi::mkl::blas::rocblas::column_major::geru,
-    oneapi::mkl::blas::rocblas::column_major::geru,
-    oneapi::mkl::blas::rocblas::column_major::hbmv,
-    oneapi::mkl::blas::rocblas::column_major::hbmv,
-    oneapi::mkl::blas::rocblas::column_major::hemv,
-    oneapi::mkl::blas::rocblas::column_major::hemv,
-    oneapi::mkl::blas::rocblas::column_major::her,
-    oneapi::mkl::blas::rocblas::column_major::her,
-    oneapi::mkl::blas::rocblas::column_major::her2,
-    oneapi::mkl::blas::rocblas::column_major::her2,
-    oneapi::mkl::blas::rocblas::column_major::hpmv,
-    oneapi::mkl::blas::rocblas::column_major::hpmv,
-    oneapi::mkl::blas::rocblas::column_major::hpr,
-    oneapi::mkl::blas::rocblas::column_major::hpr,
-    oneapi::mkl::blas::rocblas::column_major::hpr2,
-    oneapi::mkl::blas::rocblas::column_major::hpr2,
-    oneapi::mkl::blas::rocblas::column_major::sbmv,
-    oneapi::mkl::blas::rocblas::column_major::sbmv,
-    oneapi::mkl::blas::rocblas::column_major::spmv,
-    oneapi::mkl::blas::rocblas::column_major::spmv,
-    oneapi::mkl::blas::rocblas::column_major::spr,
-    oneapi::mkl::blas::rocblas::column_major::spr,
-    oneapi::mkl::blas::rocblas::column_major::spr2,
-    oneapi::mkl::blas::rocblas::column_major::spr2,
-    oneapi::mkl::blas::rocblas::column_major::symv,
-    oneapi::mkl::blas::rocblas::column_major::symv,
-    oneapi::mkl::blas::rocblas::column_major::syr,
-    oneapi::mkl::blas::rocblas::column_major::syr,
-    oneapi::mkl::blas::rocblas::column_major::syr2,
-    oneapi::mkl::blas::rocblas::column_major::syr2,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbmv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tbsv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpmv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::tpsv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trmv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::trsv,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::gemm,
-    oneapi::mkl::blas::rocblas::column_major::hemm,
-    oneapi::mkl::blas::rocblas::column_major::hemm,
-    oneapi::mkl::blas::rocblas::column_major::herk,
-    oneapi::mkl::blas::rocblas::column_major::herk,
-    oneapi::mkl::blas::rocblas::column_major::her2k,
-    oneapi::mkl::blas::rocblas::column_major::her2k,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::symm,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::syr2k,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trmm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::trsm,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemmt,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::omatadd,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::column_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::dot,
-    oneapi::mkl::blas::rocblas::row_major::dot,
-    oneapi::mkl::blas::rocblas::row_major::dot,
-    oneapi::mkl::blas::rocblas::row_major::dotc,
-    oneapi::mkl::blas::rocblas::row_major::dotc,
-    oneapi::mkl::blas::rocblas::row_major::dotu,
-    oneapi::mkl::blas::rocblas::row_major::dotu,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotm,
-    oneapi::mkl::blas::rocblas::row_major::rotm,
-    oneapi::mkl::blas::rocblas::row_major::rotmg,
-    oneapi::mkl::blas::rocblas::row_major::rotmg,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::sdsdot,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::ger,
-    oneapi::mkl::blas::rocblas::row_major::ger,
-    oneapi::mkl::blas::rocblas::row_major::gerc,
-    oneapi::mkl::blas::rocblas::row_major::gerc,
-    oneapi::mkl::blas::rocblas::row_major::geru,
-    oneapi::mkl::blas::rocblas::row_major::geru,
-    oneapi::mkl::blas::rocblas::row_major::hbmv,
-    oneapi::mkl::blas::rocblas::row_major::hbmv,
-    oneapi::mkl::blas::rocblas::row_major::hemv,
-    oneapi::mkl::blas::rocblas::row_major::hemv,
-    oneapi::mkl::blas::rocblas::row_major::her,
-    oneapi::mkl::blas::rocblas::row_major::her,
-    oneapi::mkl::blas::rocblas::row_major::her2,
-    oneapi::mkl::blas::rocblas::row_major::her2,
-    oneapi::mkl::blas::rocblas::row_major::hpmv,
-    oneapi::mkl::blas::rocblas::row_major::hpmv,
-    oneapi::mkl::blas::rocblas::row_major::hpr,
-    oneapi::mkl::blas::rocblas::row_major::hpr,
-    oneapi::mkl::blas::rocblas::row_major::hpr2,
-    oneapi::mkl::blas::rocblas::row_major::hpr2,
-    oneapi::mkl::blas::rocblas::row_major::sbmv,
-    oneapi::mkl::blas::rocblas::row_major::sbmv,
-    oneapi::mkl::blas::rocblas::row_major::spmv,
-    oneapi::mkl::blas::rocblas::row_major::spmv,
-    oneapi::mkl::blas::rocblas::row_major::spr,
-    oneapi::mkl::blas::rocblas::row_major::spr,
-    oneapi::mkl::blas::rocblas::row_major::spr2,
-    oneapi::mkl::blas::rocblas::row_major::spr2,
-    oneapi::mkl::blas::rocblas::row_major::symv,
-    oneapi::mkl::blas::rocblas::row_major::symv,
-    oneapi::mkl::blas::rocblas::row_major::syr,
-    oneapi::mkl::blas::rocblas::row_major::syr,
-    oneapi::mkl::blas::rocblas::row_major::syr2,
-    oneapi::mkl::blas::rocblas::row_major::syr2,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::hemm,
-    oneapi::mkl::blas::rocblas::row_major::hemm,
-    oneapi::mkl::blas::rocblas::row_major::herk,
-    oneapi::mkl::blas::rocblas::row_major::herk,
-    oneapi::mkl::blas::rocblas::row_major::her2k,
-    oneapi::mkl::blas::rocblas::row_major::her2k,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::asum,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpy_batch,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::axpby,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::copy_batch,
-    oneapi::mkl::blas::rocblas::row_major::dot,
-    oneapi::mkl::blas::rocblas::row_major::dot,
-    oneapi::mkl::blas::rocblas::row_major::dot,
-    oneapi::mkl::blas::rocblas::row_major::dotc,
-    oneapi::mkl::blas::rocblas::row_major::dotc,
-    oneapi::mkl::blas::rocblas::row_major::dotu,
-    oneapi::mkl::blas::rocblas::row_major::dotu,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamin,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::iamax,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::nrm2,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rot,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotg,
-    oneapi::mkl::blas::rocblas::row_major::rotm,
-    oneapi::mkl::blas::rocblas::row_major::rotm,
-    oneapi::mkl::blas::rocblas::row_major::rotmg,
-    oneapi::mkl::blas::rocblas::row_major::rotmg,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::scal,
-    oneapi::mkl::blas::rocblas::row_major::sdsdot,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::swap,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gbmv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemv_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::dgmm_batch,
-    oneapi::mkl::blas::rocblas::row_major::ger,
-    oneapi::mkl::blas::rocblas::row_major::ger,
-    oneapi::mkl::blas::rocblas::row_major::gerc,
-    oneapi::mkl::blas::rocblas::row_major::gerc,
-    oneapi::mkl::blas::rocblas::row_major::geru,
-    oneapi::mkl::blas::rocblas::row_major::geru,
-    oneapi::mkl::blas::rocblas::row_major::hbmv,
-    oneapi::mkl::blas::rocblas::row_major::hbmv,
-    oneapi::mkl::blas::rocblas::row_major::hemv,
-    oneapi::mkl::blas::rocblas::row_major::hemv,
-    oneapi::mkl::blas::rocblas::row_major::her,
-    oneapi::mkl::blas::rocblas::row_major::her,
-    oneapi::mkl::blas::rocblas::row_major::her2,
-    oneapi::mkl::blas::rocblas::row_major::her2,
-    oneapi::mkl::blas::rocblas::row_major::hpmv,
-    oneapi::mkl::blas::rocblas::row_major::hpmv,
-    oneapi::mkl::blas::rocblas::row_major::hpr,
-    oneapi::mkl::blas::rocblas::row_major::hpr,
-    oneapi::mkl::blas::rocblas::row_major::hpr2,
-    oneapi::mkl::blas::rocblas::row_major::hpr2,
-    oneapi::mkl::blas::rocblas::row_major::sbmv,
-    oneapi::mkl::blas::rocblas::row_major::sbmv,
-    oneapi::mkl::blas::rocblas::row_major::spmv,
-    oneapi::mkl::blas::rocblas::row_major::spmv,
-    oneapi::mkl::blas::rocblas::row_major::spr,
-    oneapi::mkl::blas::rocblas::row_major::spr,
-    oneapi::mkl::blas::rocblas::row_major::spr2,
-    oneapi::mkl::blas::rocblas::row_major::spr2,
-    oneapi::mkl::blas::rocblas::row_major::symv,
-    oneapi::mkl::blas::rocblas::row_major::symv,
-    oneapi::mkl::blas::rocblas::row_major::syr,
-    oneapi::mkl::blas::rocblas::row_major::syr,
-    oneapi::mkl::blas::rocblas::row_major::syr2,
-    oneapi::mkl::blas::rocblas::row_major::syr2,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbmv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tbsv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpmv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::tpsv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trmv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::trsv,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::gemm,
-    oneapi::mkl::blas::rocblas::row_major::hemm,
-    oneapi::mkl::blas::rocblas::row_major::hemm,
-    oneapi::mkl::blas::rocblas::row_major::herk,
-    oneapi::mkl::blas::rocblas::row_major::herk,
-    oneapi::mkl::blas::rocblas::row_major::her2k,
-    oneapi::mkl::blas::rocblas::row_major::her2k,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::symm,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syrk_batch,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::syr2k,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trmm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::trsm,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::trsm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemm_batch,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemmt,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::gemm_bias,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatadd_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy2,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::omatadd,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::omatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
-    oneapi::mkl::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::dot,
+    oneapi::math::blas::rocblas::column_major::dot,
+    oneapi::math::blas::rocblas::column_major::dot,
+    oneapi::math::blas::rocblas::column_major::dotc,
+    oneapi::math::blas::rocblas::column_major::dotc,
+    oneapi::math::blas::rocblas::column_major::dotu,
+    oneapi::math::blas::rocblas::column_major::dotu,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotm,
+    oneapi::math::blas::rocblas::column_major::rotm,
+    oneapi::math::blas::rocblas::column_major::rotmg,
+    oneapi::math::blas::rocblas::column_major::rotmg,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::sdsdot,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::ger,
+    oneapi::math::blas::rocblas::column_major::ger,
+    oneapi::math::blas::rocblas::column_major::gerc,
+    oneapi::math::blas::rocblas::column_major::gerc,
+    oneapi::math::blas::rocblas::column_major::geru,
+    oneapi::math::blas::rocblas::column_major::geru,
+    oneapi::math::blas::rocblas::column_major::hbmv,
+    oneapi::math::blas::rocblas::column_major::hbmv,
+    oneapi::math::blas::rocblas::column_major::hemv,
+    oneapi::math::blas::rocblas::column_major::hemv,
+    oneapi::math::blas::rocblas::column_major::her,
+    oneapi::math::blas::rocblas::column_major::her,
+    oneapi::math::blas::rocblas::column_major::her2,
+    oneapi::math::blas::rocblas::column_major::her2,
+    oneapi::math::blas::rocblas::column_major::hpmv,
+    oneapi::math::blas::rocblas::column_major::hpmv,
+    oneapi::math::blas::rocblas::column_major::hpr,
+    oneapi::math::blas::rocblas::column_major::hpr,
+    oneapi::math::blas::rocblas::column_major::hpr2,
+    oneapi::math::blas::rocblas::column_major::hpr2,
+    oneapi::math::blas::rocblas::column_major::sbmv,
+    oneapi::math::blas::rocblas::column_major::sbmv,
+    oneapi::math::blas::rocblas::column_major::spmv,
+    oneapi::math::blas::rocblas::column_major::spmv,
+    oneapi::math::blas::rocblas::column_major::spr,
+    oneapi::math::blas::rocblas::column_major::spr,
+    oneapi::math::blas::rocblas::column_major::spr2,
+    oneapi::math::blas::rocblas::column_major::spr2,
+    oneapi::math::blas::rocblas::column_major::symv,
+    oneapi::math::blas::rocblas::column_major::symv,
+    oneapi::math::blas::rocblas::column_major::syr,
+    oneapi::math::blas::rocblas::column_major::syr,
+    oneapi::math::blas::rocblas::column_major::syr2,
+    oneapi::math::blas::rocblas::column_major::syr2,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::hemm,
+    oneapi::math::blas::rocblas::column_major::hemm,
+    oneapi::math::blas::rocblas::column_major::herk,
+    oneapi::math::blas::rocblas::column_major::herk,
+    oneapi::math::blas::rocblas::column_major::her2k,
+    oneapi::math::blas::rocblas::column_major::her2k,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::asum,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpy_batch,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::axpby,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::copy_batch,
+    oneapi::math::blas::rocblas::column_major::dot,
+    oneapi::math::blas::rocblas::column_major::dot,
+    oneapi::math::blas::rocblas::column_major::dot,
+    oneapi::math::blas::rocblas::column_major::dotc,
+    oneapi::math::blas::rocblas::column_major::dotc,
+    oneapi::math::blas::rocblas::column_major::dotu,
+    oneapi::math::blas::rocblas::column_major::dotu,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamin,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::iamax,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::nrm2,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rot,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotg,
+    oneapi::math::blas::rocblas::column_major::rotm,
+    oneapi::math::blas::rocblas::column_major::rotm,
+    oneapi::math::blas::rocblas::column_major::rotmg,
+    oneapi::math::blas::rocblas::column_major::rotmg,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::scal,
+    oneapi::math::blas::rocblas::column_major::sdsdot,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::swap,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gbmv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::gemv_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::dgmm_batch,
+    oneapi::math::blas::rocblas::column_major::ger,
+    oneapi::math::blas::rocblas::column_major::ger,
+    oneapi::math::blas::rocblas::column_major::gerc,
+    oneapi::math::blas::rocblas::column_major::gerc,
+    oneapi::math::blas::rocblas::column_major::geru,
+    oneapi::math::blas::rocblas::column_major::geru,
+    oneapi::math::blas::rocblas::column_major::hbmv,
+    oneapi::math::blas::rocblas::column_major::hbmv,
+    oneapi::math::blas::rocblas::column_major::hemv,
+    oneapi::math::blas::rocblas::column_major::hemv,
+    oneapi::math::blas::rocblas::column_major::her,
+    oneapi::math::blas::rocblas::column_major::her,
+    oneapi::math::blas::rocblas::column_major::her2,
+    oneapi::math::blas::rocblas::column_major::her2,
+    oneapi::math::blas::rocblas::column_major::hpmv,
+    oneapi::math::blas::rocblas::column_major::hpmv,
+    oneapi::math::blas::rocblas::column_major::hpr,
+    oneapi::math::blas::rocblas::column_major::hpr,
+    oneapi::math::blas::rocblas::column_major::hpr2,
+    oneapi::math::blas::rocblas::column_major::hpr2,
+    oneapi::math::blas::rocblas::column_major::sbmv,
+    oneapi::math::blas::rocblas::column_major::sbmv,
+    oneapi::math::blas::rocblas::column_major::spmv,
+    oneapi::math::blas::rocblas::column_major::spmv,
+    oneapi::math::blas::rocblas::column_major::spr,
+    oneapi::math::blas::rocblas::column_major::spr,
+    oneapi::math::blas::rocblas::column_major::spr2,
+    oneapi::math::blas::rocblas::column_major::spr2,
+    oneapi::math::blas::rocblas::column_major::symv,
+    oneapi::math::blas::rocblas::column_major::symv,
+    oneapi::math::blas::rocblas::column_major::syr,
+    oneapi::math::blas::rocblas::column_major::syr,
+    oneapi::math::blas::rocblas::column_major::syr2,
+    oneapi::math::blas::rocblas::column_major::syr2,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbmv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tbsv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpmv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::tpsv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trmv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::trsv,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::gemm,
+    oneapi::math::blas::rocblas::column_major::hemm,
+    oneapi::math::blas::rocblas::column_major::hemm,
+    oneapi::math::blas::rocblas::column_major::herk,
+    oneapi::math::blas::rocblas::column_major::herk,
+    oneapi::math::blas::rocblas::column_major::her2k,
+    oneapi::math::blas::rocblas::column_major::her2k,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::symm,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syrk_batch,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::syr2k,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trmm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::trsm,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::trsm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemm_batch,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemmt,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::gemm_bias,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatadd_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::omatcopy2,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::imatcopy,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::omatadd,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::column_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::dot,
+    oneapi::math::blas::rocblas::row_major::dot,
+    oneapi::math::blas::rocblas::row_major::dot,
+    oneapi::math::blas::rocblas::row_major::dotc,
+    oneapi::math::blas::rocblas::row_major::dotc,
+    oneapi::math::blas::rocblas::row_major::dotu,
+    oneapi::math::blas::rocblas::row_major::dotu,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotm,
+    oneapi::math::blas::rocblas::row_major::rotm,
+    oneapi::math::blas::rocblas::row_major::rotmg,
+    oneapi::math::blas::rocblas::row_major::rotmg,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::sdsdot,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::ger,
+    oneapi::math::blas::rocblas::row_major::ger,
+    oneapi::math::blas::rocblas::row_major::gerc,
+    oneapi::math::blas::rocblas::row_major::gerc,
+    oneapi::math::blas::rocblas::row_major::geru,
+    oneapi::math::blas::rocblas::row_major::geru,
+    oneapi::math::blas::rocblas::row_major::hbmv,
+    oneapi::math::blas::rocblas::row_major::hbmv,
+    oneapi::math::blas::rocblas::row_major::hemv,
+    oneapi::math::blas::rocblas::row_major::hemv,
+    oneapi::math::blas::rocblas::row_major::her,
+    oneapi::math::blas::rocblas::row_major::her,
+    oneapi::math::blas::rocblas::row_major::her2,
+    oneapi::math::blas::rocblas::row_major::her2,
+    oneapi::math::blas::rocblas::row_major::hpmv,
+    oneapi::math::blas::rocblas::row_major::hpmv,
+    oneapi::math::blas::rocblas::row_major::hpr,
+    oneapi::math::blas::rocblas::row_major::hpr,
+    oneapi::math::blas::rocblas::row_major::hpr2,
+    oneapi::math::blas::rocblas::row_major::hpr2,
+    oneapi::math::blas::rocblas::row_major::sbmv,
+    oneapi::math::blas::rocblas::row_major::sbmv,
+    oneapi::math::blas::rocblas::row_major::spmv,
+    oneapi::math::blas::rocblas::row_major::spmv,
+    oneapi::math::blas::rocblas::row_major::spr,
+    oneapi::math::blas::rocblas::row_major::spr,
+    oneapi::math::blas::rocblas::row_major::spr2,
+    oneapi::math::blas::rocblas::row_major::spr2,
+    oneapi::math::blas::rocblas::row_major::symv,
+    oneapi::math::blas::rocblas::row_major::symv,
+    oneapi::math::blas::rocblas::row_major::syr,
+    oneapi::math::blas::rocblas::row_major::syr,
+    oneapi::math::blas::rocblas::row_major::syr2,
+    oneapi::math::blas::rocblas::row_major::syr2,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::hemm,
+    oneapi::math::blas::rocblas::row_major::hemm,
+    oneapi::math::blas::rocblas::row_major::herk,
+    oneapi::math::blas::rocblas::row_major::herk,
+    oneapi::math::blas::rocblas::row_major::her2k,
+    oneapi::math::blas::rocblas::row_major::her2k,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::asum,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpy_batch,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::axpby,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::copy_batch,
+    oneapi::math::blas::rocblas::row_major::dot,
+    oneapi::math::blas::rocblas::row_major::dot,
+    oneapi::math::blas::rocblas::row_major::dot,
+    oneapi::math::blas::rocblas::row_major::dotc,
+    oneapi::math::blas::rocblas::row_major::dotc,
+    oneapi::math::blas::rocblas::row_major::dotu,
+    oneapi::math::blas::rocblas::row_major::dotu,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamin,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::iamax,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::nrm2,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rot,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotg,
+    oneapi::math::blas::rocblas::row_major::rotm,
+    oneapi::math::blas::rocblas::row_major::rotm,
+    oneapi::math::blas::rocblas::row_major::rotmg,
+    oneapi::math::blas::rocblas::row_major::rotmg,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::scal,
+    oneapi::math::blas::rocblas::row_major::sdsdot,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::swap,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gbmv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::gemv_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::dgmm_batch,
+    oneapi::math::blas::rocblas::row_major::ger,
+    oneapi::math::blas::rocblas::row_major::ger,
+    oneapi::math::blas::rocblas::row_major::gerc,
+    oneapi::math::blas::rocblas::row_major::gerc,
+    oneapi::math::blas::rocblas::row_major::geru,
+    oneapi::math::blas::rocblas::row_major::geru,
+    oneapi::math::blas::rocblas::row_major::hbmv,
+    oneapi::math::blas::rocblas::row_major::hbmv,
+    oneapi::math::blas::rocblas::row_major::hemv,
+    oneapi::math::blas::rocblas::row_major::hemv,
+    oneapi::math::blas::rocblas::row_major::her,
+    oneapi::math::blas::rocblas::row_major::her,
+    oneapi::math::blas::rocblas::row_major::her2,
+    oneapi::math::blas::rocblas::row_major::her2,
+    oneapi::math::blas::rocblas::row_major::hpmv,
+    oneapi::math::blas::rocblas::row_major::hpmv,
+    oneapi::math::blas::rocblas::row_major::hpr,
+    oneapi::math::blas::rocblas::row_major::hpr,
+    oneapi::math::blas::rocblas::row_major::hpr2,
+    oneapi::math::blas::rocblas::row_major::hpr2,
+    oneapi::math::blas::rocblas::row_major::sbmv,
+    oneapi::math::blas::rocblas::row_major::sbmv,
+    oneapi::math::blas::rocblas::row_major::spmv,
+    oneapi::math::blas::rocblas::row_major::spmv,
+    oneapi::math::blas::rocblas::row_major::spr,
+    oneapi::math::blas::rocblas::row_major::spr,
+    oneapi::math::blas::rocblas::row_major::spr2,
+    oneapi::math::blas::rocblas::row_major::spr2,
+    oneapi::math::blas::rocblas::row_major::symv,
+    oneapi::math::blas::rocblas::row_major::symv,
+    oneapi::math::blas::rocblas::row_major::syr,
+    oneapi::math::blas::rocblas::row_major::syr,
+    oneapi::math::blas::rocblas::row_major::syr2,
+    oneapi::math::blas::rocblas::row_major::syr2,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbmv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tbsv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpmv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::tpsv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trmv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::trsv,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::gemm,
+    oneapi::math::blas::rocblas::row_major::hemm,
+    oneapi::math::blas::rocblas::row_major::hemm,
+    oneapi::math::blas::rocblas::row_major::herk,
+    oneapi::math::blas::rocblas::row_major::herk,
+    oneapi::math::blas::rocblas::row_major::her2k,
+    oneapi::math::blas::rocblas::row_major::her2k,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::symm,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syrk_batch,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::syr2k,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trmm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::trsm,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::trsm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemm_batch,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemmt,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::gemm_bias,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatadd_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::omatcopy2,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::imatcopy,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::omatadd,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::omatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
+    oneapi::math::blas::rocblas::row_major::imatcopy_batch,
 };
diff --git a/src/blas/blas_loader.cpp b/src/blas/blas_loader.cpp
index e2276c0fe..33c3ca185 100644
--- a/src/blas/blas_loader.cpp
+++ b/src/blas/blas_loader.cpp
@@ -17,68 +17,68 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/blas/detail/blas_loader.hpp"
+#include "oneapi/math/blas/detail/blas_loader.hpp"
 
 #include "function_table_initializer.hpp"
 #include "blas/function_table.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace blas {
 namespace column_major {
 namespace detail {
 
-static oneapi::mkl::detail::table_initializer<domain::blas, blas_function_table_t> function_tables;
+static oneapi::math::detail::table_initializer<domain::blas, blas_function_table_t> function_tables;
 
 // Buffer APIs
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].column_major_scasum_sycl(queue, n, x, incx, result);
 }
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].column_major_dzasum_sycl(queue, n, x, incx, result);
 }
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& result) {
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].column_major_sasum_sycl(queue, n, x, incx, result);
 }
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].column_major_dasum_sycl(queue, n, x, incx, result);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -86,7 +86,7 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -94,7 +94,7 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
@@ -102,7 +102,7 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
@@ -110,58 +110,59 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
            sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
            std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                                 incy);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
            sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
            std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                                 incy);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                                 incy);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                                 incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+          std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_scopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_dcopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_ccopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_zcopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -169,7 +170,7 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -177,7 +178,7 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -185,7 +186,7 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -193,247 +194,249 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].column_major_sdot_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
-         sycl::buffer<double, 1>& result) {
+void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+         std::int64_t incy, sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].column_major_ddot_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].column_major_dsdot_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
     function_tables[{ libkey, queue }].column_major_cdotc_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
     function_tables[{ libkey, queue }].column_major_zdotc_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
     function_tables[{ libkey, queue }].column_major_cdotu_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
     function_tables[{ libkey, queue }].column_major_zdotu_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_isamin_sycl(queue, n, x, incx, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_idamin_sycl(queue, n, x, incx, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_icamin_sycl(queue, n, x, incx, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_izamin_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_isamax_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_idamax_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_icamax_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].column_major_izamax_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].column_major_scnrm2_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].column_major_dznrm2_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& result) {
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].column_major_snrm2_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].column_major_dnrm2_sycl(queue, n, x, incx, result);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c, float s) {
     function_tables[{ libkey, queue }].column_major_srot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c, double s) {
     function_tables[{ libkey, queue }].column_major_drot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
     function_tables[{ libkey, queue }].column_major_csrot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+         std::int64_t incy, double c, double s) {
     function_tables[{ libkey, queue }].column_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
     function_tables[{ libkey, queue }].column_major_srotg_sycl(queue, a, b, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
     function_tables[{ libkey, queue }].column_major_drotg_sycl(queue, a, b, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
     function_tables[{ libkey, queue }].column_major_crotg_sycl(queue, a, b, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
     function_tables[{ libkey, queue }].column_major_zrotg_sycl(queue, a, b, c, s);
 }
 
-void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
-          sycl::buffer<float, 1>& param) {
+void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+          std::int64_t incy, sycl::buffer<float, 1>& param) {
     function_tables[{ libkey, queue }].column_major_srotm_sycl(queue, n, x, incx, y, incy, param);
 }
 
-void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& param) {
     function_tables[{ libkey, queue }].column_major_drotm_sycl(queue, n, x, incx, y, incy, param);
 }
 
-void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& d1,
+void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
     function_tables[{ libkey, queue }].column_major_srotmg_sycl(queue, d1, d2, x1, y1, param);
 }
 
-void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& d1,
+void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
     function_tables[{ libkey, queue }].column_major_drotmg_sycl(queue, d1, d2, x1, y1, param);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_sscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_dscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_cscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_csscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_zscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_zdscal_sycl(queue, n, alpha, x, incx);
 }
 
-void sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb,
+void sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb,
             sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
             std::int64_t incy, sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].column_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy,
                                                                 result);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+          std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_sswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_dswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_cswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_zswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
@@ -441,7 +444,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                lda, x, incx, beta, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
@@ -449,7 +452,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                lda, x, incx, beta, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
@@ -458,7 +461,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                lda, x, incx, beta, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
@@ -467,7 +470,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                lda, x, incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
@@ -475,7 +478,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
@@ -483,7 +486,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -491,7 +494,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -499,7 +502,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                                incx, beta, y, incy);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
                 std::int64_t stridex, float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
@@ -509,7 +512,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
                 std::int64_t stridex, double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
@@ -519,7 +522,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
@@ -530,7 +533,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
@@ -541,7 +544,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -550,7 +553,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -559,7 +562,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
@@ -568,7 +571,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
@@ -577,21 +580,21 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
          std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy,
                                                               a, lda);
 }
 
-void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy,
                                                               a, lda);
 }
 
-void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
@@ -599,7 +602,7 @@ void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                                a, lda);
 }
 
-void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
@@ -607,7 +610,7 @@ void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                                a, lda);
 }
 
-void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
@@ -615,7 +618,7 @@ void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                                a, lda);
 }
 
-void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
@@ -623,7 +626,7 @@ void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                                a, lda);
 }
 
-void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -631,7 +634,7 @@ void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                lda, x, incx, beta, y, incy);
 }
 
-void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -639,7 +642,7 @@ void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                lda, x, incx, beta, y, incy);
 }
 
-void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -647,7 +650,7 @@ void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                x, incx, beta, y, incy);
 }
 
-void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -655,21 +658,21 @@ void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                x, incx, beta, y, incy);
 }
 
-void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_cher_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a, lda);
 }
 
-void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_zher_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a, lda);
 }
 
-void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
@@ -677,7 +680,7 @@ void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                incx, y, incy, a, lda);
 }
 
-void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
@@ -685,7 +688,7 @@ void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                incx, y, incy, a, lda);
 }
 
-void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -693,7 +696,7 @@ void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                incx, beta, y, incy);
 }
 
-void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -701,21 +704,21 @@ void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                incx, beta, y, incy);
 }
 
-void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
     function_tables[{ libkey, queue }].column_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a);
 }
 
-void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
     function_tables[{ libkey, queue }].column_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a);
 }
 
-void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
@@ -723,7 +726,7 @@ void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                incx, y, incy, a);
 }
 
-void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
@@ -731,7 +734,7 @@ void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                incx, y, incy, a);
 }
 
-void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
@@ -739,7 +742,7 @@ void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                lda, x, incx, beta, y, incy);
 }
 
-void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
@@ -747,257 +750,257 @@ void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                                lda, x, incx, beta, y, incy);
 }
 
-void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x,
                                                                incx, beta, y, incy);
 }
 
-void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx,
           double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x,
                                                                incx, beta, y, incy);
 }
 
-void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a) {
     function_tables[{ libkey, queue }].column_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a);
 }
 
-void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a) {
     function_tables[{ libkey, queue }].column_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a);
 }
 
-void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a) {
     function_tables[{ libkey, queue }].column_major_sspr2_sycl(queue, upper_lower, n, alpha, x,
                                                                incx, y, incy, a);
 }
 
-void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a) {
     function_tables[{ libkey, queue }].column_major_dspr2_sycl(queue, upper_lower, n, alpha, x,
                                                                incx, y, incy, a);
 }
 
-void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
           std::int64_t incx, float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda,
                                                                x, incx, beta, y, incy);
 }
 
-void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].column_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda,
                                                                x, incx, beta, y, incy);
 }
 
-void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a, lda);
 }
 
-void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
          std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                               a, lda);
 }
 
-void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_ssyr2_sycl(queue, upper_lower, n, alpha, x,
                                                                incx, y, incy, a, lda);
 }
 
-void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].column_major_dsyr2_sycl(queue, upper_lower, n, alpha, x,
                                                                incx, y, incy, a, lda);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_stbmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_stbsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, k, a, lda, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_stpmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_stpsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_strmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_strsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].column_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag,
                                                                n, a, lda, x, incx);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -1005,7 +1008,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -1013,7 +1016,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -1022,7 +1025,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -1031,7 +1034,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
@@ -1039,7 +1042,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -1047,7 +1050,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
           std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -1055,7 +1058,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -1064,7 +1067,7 @@ void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -1073,14 +1076,14 @@ void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, float beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
     function_tables[{ libkey, queue }].column_major_cherk_sycl(queue, upper_lower, trans, n, k,
                                                                alpha, a, lda, beta, c, ldc);
 }
 
-void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<std::complex<double>, 1>& c,
           std::int64_t ldc) {
@@ -1088,7 +1091,7 @@ void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tran
                                                                alpha, a, lda, beta, c, ldc);
 }
 
-void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
@@ -1097,7 +1100,7 @@ void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
@@ -1106,7 +1109,7 @@ void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
@@ -1114,7 +1117,7 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -1122,7 +1125,7 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -1131,7 +1134,7 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -1140,21 +1143,21 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                                alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
     function_tables[{ libkey, queue }].column_major_ssyrk_sycl(queue, upper_lower, trans, n, k,
                                                                alpha, a, lda, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
     function_tables[{ libkey, queue }].column_major_dsyrk_sycl(queue, upper_lower, trans, n, k,
                                                                alpha, a, lda, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
@@ -1162,7 +1165,7 @@ void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tran
                                                                alpha, a, lda, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
@@ -1170,7 +1173,7 @@ void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tran
                                                                alpha, a, lda, beta, c, ldc);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
@@ -1179,7 +1182,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
@@ -1188,7 +1191,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
@@ -1198,7 +1201,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
@@ -1208,7 +1211,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
            sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            std::int64_t ldc) {
@@ -1216,7 +1219,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -1224,7 +1227,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -1233,7 +1236,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -1242,7 +1245,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
@@ -1250,7 +1253,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
@@ -1258,7 +1261,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
@@ -1266,7 +1269,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
@@ -1274,7 +1277,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
@@ -1282,7 +1285,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
@@ -1290,7 +1293,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
@@ -1298,7 +1301,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
@@ -1306,7 +1309,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
         queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -1317,7 +1320,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, double alpha,
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
@@ -1328,7 +1331,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1339,7 +1342,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1350,7 +1353,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
                 sycl::buffer<sycl::half, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1361,7 +1364,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<sycl::half, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -1372,7 +1375,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1383,7 +1386,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1394,7 +1397,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1404,7 +1407,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1414,7 +1417,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
@@ -1424,7 +1427,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
@@ -1434,7 +1437,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -1442,7 +1445,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, double alpha,
            sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -1450,7 +1453,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -1459,7 +1462,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -1468,7 +1471,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
@@ -1477,7 +1480,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
@@ -1486,7 +1489,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
@@ -1495,7 +1498,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
@@ -1504,24 +1507,24 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                    std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
-                    std::int64_t stride_b, std::int64_t batch_size) {
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                    std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b,
+                    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                    std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
-                    std::int64_t stride_b, std::int64_t batch_size) {
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                    std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b,
+                    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<float> alpha,
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
@@ -1529,8 +1532,8 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<double> alpha,
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
@@ -1538,37 +1541,39 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
-                    std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& ab,
+                    std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                    std::int64_t batch_size) {
     function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
-                    std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& ab,
+                    std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                    std::int64_t batch_size) {
     function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<float> alpha,
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<double> alpha,
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, float alpha,
                    sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a, float beta,
                    sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1579,7 +1584,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, double alpha,
                    sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a, double beta,
                    sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1590,7 +1595,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                    sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
@@ -1601,7 +1606,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                    sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                    std::int64_t stride_a, std::complex<double> beta,
@@ -1613,35 +1618,35 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_somatcopy_sycl(queue, trans, m, n, alpha, a,
                                                                    lda, b, ldb);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_domatcopy_sycl(queue, trans, m, n, alpha, a,
                                                                    lda, b, ldb);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_comatcopy_sycl(queue, trans, m, n, alpha, a,
                                                                    lda, b, ldb);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_zomatcopy_sycl(queue, trans, m, n, alpha, a,
                                                                    lda, b, ldb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
@@ -1649,7 +1654,7 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                     lda, stridea, b, ldb, strideb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
@@ -1657,7 +1662,7 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                     lda, stridea, b, ldb, strideb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
@@ -1665,7 +1670,7 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                     lda, stridea, b, ldb, strideb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
@@ -1673,35 +1678,35 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                     lda, stridea, b, ldb, strideb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_simatcopy_sycl(queue, trans, m, n, alpha, ab,
                                                                    lda, ldb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab,
                                                                    lda, ldb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab,
                                                                    lda, ldb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
     function_tables[{ libkey, queue }].column_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab,
                                                                    lda, ldb);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -1709,7 +1714,7 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -1717,7 +1722,7 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, std::complex<float> alpha,
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
@@ -1726,7 +1731,7 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, std::complex<double> alpha,
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
@@ -1737,47 +1742,47 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
 
 // USM APIs
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_scasum_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dzasum_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sasum_usm_sycl(queue, n, x, incx, result,
                                                                           dependencies);
 }
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dasum_usm_sycl(queue, n, x, incx, result,
                                                                           dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                  const float* x, std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_saxpy_usm_sycl(queue, n, alpha, x, incx,
                                                                           y, incy, dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                  const double* x, std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_daxpy_usm_sycl(queue, n, alpha, x, incx,
                                                                           y, incy, dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -1785,7 +1790,7 @@ sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                           y, incy, dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -1793,7 +1798,7 @@ sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                           y, incy, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        float* alpha, const float** x, std::int64_t* incx, float** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1801,7 +1806,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        double* alpha, const double** x, std::int64_t* incx, double** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1809,7 +1814,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        std::complex<float>* alpha, const std::complex<float>** x,
                        std::int64_t* incx, std::complex<float>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -1818,7 +1823,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        std::complex<double>* alpha, const std::complex<double>** x,
                        std::int64_t* incx, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -1827,7 +1832,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1835,15 +1840,15 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
-                       const double* x, std::int64_t incx, std::int64_t stridex, double* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                       double alpha, const double* x, std::int64_t incx, std::int64_t stridex,
+                       double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_usm_sycl(
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                        std::int64_t stridex, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
@@ -1852,7 +1857,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                        std::int64_t stridex, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
@@ -1861,21 +1866,21 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                   const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_saxpby_usm_sycl(
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                   const double* x, std::int64_t incx, const double beta, double* y,
                   std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_daxpby_usm_sycl(
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                   const std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
@@ -1883,7 +1888,7 @@ sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
@@ -1891,35 +1896,35 @@ sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_scopy_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dcopy_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_ccopy_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zcopy_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1927,7 +1932,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1935,7 +1940,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1943,7 +1948,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1951,7 +1956,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1959,7 +1964,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -1967,7 +1972,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
@@ -1975,7 +1980,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
@@ -1983,28 +1988,28 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sdot_usm_sycl(queue, n, x, incx, y, incy,
                                                                          result, dependencies);
 }
 
-sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_ddot_usm_sycl(queue, n, x, incx, y, incy,
                                                                          result, dependencies);
 }
 
-sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dsdot_usm_sycl(
         queue, n, x, incx, y, incy, result, dependencies);
 }
 
-sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -2012,7 +2017,7 @@ sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, y, incy, result, dependencies);
 }
 
-sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -2020,7 +2025,7 @@ sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, y, incy, result, dependencies);
 }
 
-sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -2028,7 +2033,7 @@ sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, y, incy, result, dependencies);
 }
 
-sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -2036,96 +2041,96 @@ sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, y, incy, result, dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_isamin_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_idamin_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_icamin_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_izamin_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_isamax_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_idamax_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_icamax_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_izamax_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_scnrm2_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dznrm2_usm_sycl(queue, n, x, incx,
                                                                            result, dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_snrm2_usm_sycl(queue, n, x, incx, result,
                                                                           dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dnrm2_usm_sycl(queue, n, x, incx, result,
                                                                           dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                 std::int64_t incy, float c, float s, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_srot_usm_sycl(queue, n, x, incx, y, incy,
                                                                          c, s, dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                 std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
@@ -2133,149 +2138,149 @@ sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                          c, s, dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x,
                 std::int64_t incx, float* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_csrot_usm_sycl(queue, n, x, incx, y,
                                                                           incy, c, s, dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x,
                 std::int64_t incx, double* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zdrot_usm_sycl(queue, n, x, incx, y,
                                                                           incy, c, s, dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, float* a, float* b, float* c,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, float* a, float* b, float* c,
                  float* s, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_srotg_usm_sycl(queue, a, b, c, s,
                                                                           dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, double* a, double* b, double* c,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_drotg_usm_sycl(queue, a, b, c, s,
                                                                           dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex<float>* a,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex<float>* a,
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_crotg_usm_sycl(queue, a, b, c, s,
                                                                           dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex<double>* a,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex<double>* a,
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zrotg_usm_sycl(queue, a, b, c, s,
                                                                           dependencies);
 }
 
-sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x,
+sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_srotm_usm_sycl(
         queue, n, x, incx, y, incy, param, dependencies);
 }
 
-sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x,
+sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_drotm_usm_sycl(
         queue, n, x, incx, y, incy, param, dependencies);
 }
 
-sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1,
+sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_srotmg_usm_sycl(queue, d1, d2, x1, y1,
                                                                            param, dependencies);
 }
 
-sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, double* d1, double* d2,
+sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, double* d1, double* d2,
                   double* x1, double y1, double* param,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_drotmg_usm_sycl(queue, d1, d2, x1, y1,
                                                                            param, dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sscal_usm_sycl(queue, n, alpha, x, incx,
                                                                           dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dscal_usm_sycl(queue, n, alpha, x, incx,
                                                                           dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_cscal_usm_sycl(queue, n, alpha, x, incx,
                                                                           dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<double> alpha, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_csscal_usm_sycl(queue, n, alpha, x, incx,
                                                                            dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zscal_usm_sycl(queue, n, alpha, x, incx,
                                                                           dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zdscal_usm_sycl(queue, n, alpha, x, incx,
                                                                            dependencies);
 }
 
-sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb,
+sycl::event sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb,
                    const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                    float* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sdsdot_usm_sycl(
         queue, n, sb, x, incx, y, incy, result, dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sswap_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dswap_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_cswap_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zswap_usm_sycl(queue, n, x, incx, y,
                                                                           incy, dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a,
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
@@ -2283,7 +2288,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a,
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
@@ -2291,7 +2296,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* x,
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
@@ -2300,7 +2305,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* x,
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
@@ -2309,7 +2314,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -2317,7 +2322,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -2325,7 +2330,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
@@ -2334,7 +2339,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
@@ -2343,7 +2348,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, float alpha, const float* a,
                        std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx,
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
@@ -2354,7 +2359,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, double alpha, const double* a,
                        std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx,
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
@@ -2365,7 +2370,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, std::complex<float> alpha,
                        const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
@@ -2377,7 +2382,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, std::complex<double> alpha,
                        const std::complex<double>* a, std::int64_t lda, std::int64_t stridea,
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
@@ -2389,7 +2394,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, float* alpha, const float** a,
                        std::int64_t* lda, const float** x, std::int64_t* incx, float* beta,
                        float** y, std::int64_t* incy, std::int64_t group_count,
@@ -2399,7 +2404,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, double* alpha, const double** a,
                        std::int64_t* lda, const double** x, std::int64_t* incx, double* beta,
                        double** y, std::int64_t* incy, std::int64_t group_count,
@@ -2409,7 +2414,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
                        const std::complex<float>** a, std::int64_t* lda,
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
@@ -2420,7 +2425,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
                        const std::complex<double>** a, std::int64_t* lda,
                        const std::complex<double>** x, std::int64_t* incx,
@@ -2432,7 +2437,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const float* a, std::int64_t lda,
                        std::int64_t stridea, const float* x, std::int64_t incx,
                        std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec,
@@ -2442,7 +2447,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const double* a, std::int64_t lda,
                        std::int64_t stridea, const double* x, std::int64_t incx,
                        std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec,
@@ -2452,7 +2457,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const std::complex<float>* a,
                        std::int64_t lda, std::int64_t stridea, const std::complex<float>* x,
                        std::int64_t incx, std::int64_t stridex, std::complex<float>* c,
@@ -2463,7 +2468,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const std::complex<double>* a,
                        std::int64_t lda, std::int64_t stridea, const std::complex<double>* x,
                        std::int64_t incx, std::int64_t stridex, std::complex<double>* c,
@@ -2474,7 +2479,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda,
                        const float** x, std::int64_t* incx, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -2483,7 +2488,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda,
                        const double** x, std::int64_t* incx, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -2492,7 +2497,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const std::complex<float>** a,
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
@@ -2501,7 +2506,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const std::complex<double>** a,
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
@@ -2510,14 +2515,14 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                 float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sger_usm_sycl(
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, const double* y,
                 std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
@@ -2525,7 +2530,7 @@ sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -2533,7 +2538,7 @@ sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -2541,7 +2546,7 @@ sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -2549,7 +2554,7 @@ sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -2557,7 +2562,7 @@ sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
@@ -2566,7 +2571,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, std::complex<double> alpha, const std::complex<double>* a,
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
@@ -2575,7 +2580,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
@@ -2584,7 +2589,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
@@ -2593,7 +2598,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
@@ -2601,7 +2606,7 @@ sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
@@ -2609,7 +2614,7 @@ sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -2617,7 +2622,7 @@ sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -2625,7 +2630,7 @@ sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
@@ -2634,7 +2639,7 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
@@ -2643,21 +2648,21 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_chpr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, dependencies);
 }
 
-sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zhpr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, dependencies);
 }
 
-sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
@@ -2665,7 +2670,7 @@ sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
@@ -2673,7 +2678,7 @@ sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -2681,7 +2686,7 @@ sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -2689,49 +2694,49 @@ sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sspmv_usm_sycl(
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dspmv_usm_sycl(
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sspr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, dependencies);
 }
 
-sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dspr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, dependencies);
 }
 
-sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sspr2_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dspr2_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx,
                  float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -2739,7 +2744,7 @@ sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -2747,28 +2752,28 @@ sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_ssyr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dsyr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_ssyr2_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
@@ -2776,21 +2781,21 @@ sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_stbmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dtbmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2798,7 +2803,7 @@ sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2806,21 +2811,21 @@ sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_stbsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dtbsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2828,7 +2833,7 @@ sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2836,21 +2841,21 @@ sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_stpmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dtpmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2858,7 +2863,7 @@ sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2866,21 +2871,21 @@ sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_stpsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dtpsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2888,7 +2893,7 @@ sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2896,21 +2901,21 @@ sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_strmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dtrmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2918,7 +2923,7 @@ sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2926,21 +2931,21 @@ sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_strsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dtrsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2948,7 +2953,7 @@ sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -2956,42 +2961,44 @@ sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a,
-                 std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                 const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta,
+                 float* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_sgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a,
-                 std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha,
+                 const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta,
+                 double* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                 const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                 std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                 std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
+                 const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
+                 std::complex<float>* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_cgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                 const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                 std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                 std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
+                 const std::complex<double>* b, std::int64_t ldb, std::complex<double> beta,
+                 std::complex<double>* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_zgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
                  const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -2999,23 +3006,25 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
-                 std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                 const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
+                 float beta, float* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a,
-                 std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                 const bfloat16* a, std::int64_t lda, const bfloat16* b, std::int64_t ldb,
+                 float beta, float* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
@@ -3024,7 +3033,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
@@ -3033,7 +3042,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, float alpha, const std::complex<float>* a,
                  std::int64_t lda, float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -3041,7 +3050,7 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, double alpha, const std::complex<double>* a,
                  std::int64_t lda, double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -3049,8 +3058,8 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<float> alpha,
+sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, float beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
@@ -3058,8 +3067,8 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<double> alpha,
+sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, double beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
@@ -3067,7 +3076,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -3075,7 +3084,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -3083,7 +3092,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
@@ -3092,7 +3101,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
@@ -3101,7 +3110,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -3109,7 +3118,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -3117,7 +3126,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                  std::complex<float>* c, std::int64_t ldc,
@@ -3126,7 +3135,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                  std::complex<double>* c, std::int64_t ldc,
@@ -3135,7 +3144,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha,
                        const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -3145,7 +3154,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha,
                        const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -3155,7 +3164,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k,
                        std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
@@ -3166,7 +3175,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k,
                        std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
@@ -3177,7 +3186,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a,
                        std::int64_t lda, std::int64_t stride_a, float beta, float* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
@@ -3187,7 +3196,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, double alpha,
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
@@ -3197,7 +3206,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                        const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                        std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
@@ -3208,7 +3217,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                        const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                        std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
@@ -3219,24 +3228,24 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
-                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies) {
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a,
+                  std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
+                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_ssyr2k_usm_sycl(
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
-                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies) {
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, double alpha, const double* a,
+                  std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
+                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dsyr2k_usm_sycl(
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<float> alpha,
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -3244,8 +3253,8 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<double> alpha,
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -3253,7 +3262,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -3262,7 +3271,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -3271,7 +3280,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
@@ -3281,7 +3290,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
@@ -3291,7 +3300,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -3300,7 +3309,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -3309,7 +3318,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
@@ -3319,7 +3328,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
@@ -3329,7 +3338,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
@@ -3339,7 +3348,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
@@ -3349,7 +3358,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
@@ -3360,7 +3369,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
@@ -3371,7 +3380,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
@@ -3381,7 +3390,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        double** b, std::int64_t* ldb, std::int64_t group_count,
@@ -3391,7 +3400,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, std::complex<float>* alpha, const std::complex<float>** a,
                        std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
@@ -3402,7 +3411,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
@@ -3413,7 +3422,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const float** a, std::int64_t* lda, const float** b,
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
@@ -3424,7 +3433,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        double* alpha, const double** a, std::int64_t* lda, const double** b,
                        std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
@@ -3435,7 +3444,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
                        const std::complex<float>** b, std::int64_t* ldb, std::complex<float>* beta,
@@ -3446,7 +3455,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, const std::complex<double>** b, std::int64_t* ldb,
@@ -3458,7 +3467,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        sycl::half* alpha, const sycl::half** a, std::int64_t* lda,
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
@@ -3469,7 +3478,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b,
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
@@ -3480,7 +3489,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const std::int8_t** a, std::int64_t* lda,
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
@@ -3491,7 +3500,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const std::int8_t** a, std::int64_t* lda,
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
@@ -3502,7 +3511,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const float* a, std::int64_t lda, std::int64_t stride_a,
                        const float* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -3513,7 +3522,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        double alpha, const double* a, std::int64_t lda, std::int64_t stride_a,
                        const double* b, std::int64_t ldb, std::int64_t stride_b, double beta,
@@ -3524,7 +3533,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                        std::int64_t stride_a, const std::complex<float>* b, std::int64_t ldb,
@@ -3536,7 +3545,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                        std::int64_t stride_a, const std::complex<double>* b, std::int64_t ldb,
@@ -3548,7 +3557,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        sycl::half alpha, const sycl::half* a, std::int64_t lda,
                        std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
@@ -3560,7 +3569,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
                        const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -3571,7 +3580,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -3582,7 +3591,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -3593,7 +3602,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha,
                   const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta,
                   float* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -3602,7 +3611,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha,
                   const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta,
                   double* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -3611,7 +3620,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k,
                   std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                   const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
@@ -3622,7 +3631,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k,
                   std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                   const std::complex<double>* b, std::int64_t ldb, std::complex<double> beta,
@@ -3633,7 +3642,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda,
                       std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
@@ -3644,7 +3653,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda,
                       std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
@@ -3655,7 +3664,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda,
                       std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
@@ -3666,7 +3675,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda,
                       std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
@@ -3677,7 +3686,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, float alpha, const float* a,
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
@@ -3686,7 +3695,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, double alpha, const double* a,
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
@@ -3695,7 +3704,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<float> alpha,
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -3704,7 +3713,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<double> alpha,
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -3713,7 +3722,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
@@ -3721,7 +3730,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
@@ -3729,7 +3738,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<float> alpha,
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
@@ -3738,7 +3747,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<double> alpha,
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
@@ -3747,7 +3756,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n, float alpha,
                           const float* a, std::int64_t lda, std::int64_t stride_a, float beta,
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
@@ -3758,7 +3767,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n, double alpha,
                           const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
@@ -3769,7 +3778,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n,
                           std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                           std::int64_t stride_a, std::complex<float> beta,
@@ -3781,7 +3790,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n,
                           std::complex<double> alpha, const std::complex<double>* a,
                           std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
@@ -3793,14 +3802,14 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                      float* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_somatcopy_usm_sycl(
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, double alpha, const double* a,
                      std::int64_t lda, double* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
@@ -3808,7 +3817,7 @@ sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<float> alpha,
                      const std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
@@ -3816,7 +3825,7 @@ sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<double> alpha,
                      const std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
@@ -3824,7 +3833,7 @@ sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
@@ -3832,7 +3841,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, double alpha, const double* a,
                       std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb,
                       std::int64_t strideb, const std::vector<sycl::event>& dependencies) {
@@ -3840,7 +3849,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, std::complex<float> alpha,
                       const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
                       std::complex<float>* b, std::int64_t ldb, std::int64_t strideb,
@@ -3849,7 +3858,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, std::complex<double> alpha,
                       const std::complex<double>* a, std::int64_t lda, std::int64_t stridea,
                       std::complex<double>* b, std::int64_t ldb, std::int64_t strideb,
@@ -3858,21 +3867,21 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_simatcopy_usm_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].column_major_dimatcopy_usm_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<float> alpha,
                      std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
@@ -3880,7 +3889,7 @@ sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<double> alpha,
                      std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
@@ -3888,7 +3897,7 @@ sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a,
                     std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -3896,7 +3905,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a,
                     std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -3904,7 +3913,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
@@ -3913,7 +3922,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
@@ -3922,7 +3931,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, float* alpha, const float** a,
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -3931,7 +3940,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, double* alpha, const double** a,
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -3940,7 +3949,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
@@ -3949,7 +3958,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
@@ -3958,7 +3967,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
@@ -3966,7 +3975,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
@@ -3974,7 +3983,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -3983,7 +3992,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -3997,57 +4006,57 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
 namespace row_major {
 namespace detail {
 
-static oneapi::mkl::detail::table_initializer<domain::blas, blas_function_table_t> function_tables;
+static oneapi::math::detail::table_initializer<domain::blas, blas_function_table_t> function_tables;
 
 // Buffer APIs
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].row_major_scasum_sycl(queue, n, x, incx, result);
 }
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].row_major_dzasum_sycl(queue, n, x, incx, result);
 }
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& result) {
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].row_major_sasum_sycl(queue, n, x, incx, result);
 }
 
-void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].row_major_dasum_sycl(queue, n, x, incx, result);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -4055,7 +4064,7 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -4063,7 +4072,7 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
@@ -4071,7 +4080,7 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& y,
                 std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) {
@@ -4079,58 +4088,59 @@ void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
            sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
            std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                              incy);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
            sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
            std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                              incy);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                              incy);
 }
 
-void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y,
                                                              incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+          std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_scopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_dcopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_ccopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_zcopy_sycl(queue, n, x, incx, y, incy);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -4138,7 +4148,7 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -4146,7 +4156,7 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -4154,7 +4164,7 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, std::int64_t stridey,
                 std::int64_t batch_size) {
@@ -4162,247 +4172,249 @@ void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
         queue, n, x, incx, stridex, y, incy, stridey, batch_size);
 }
 
-void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].row_major_sdot_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy,
-         sycl::buffer<double, 1>& result) {
+void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+         std::int64_t incy, sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].row_major_ddot_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
          sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].row_major_dsdot_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
     function_tables[{ libkey, queue }].row_major_cdotc_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
     function_tables[{ libkey, queue }].row_major_zdotc_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& result) {
     function_tables[{ libkey, queue }].row_major_cdotu_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& result) {
     function_tables[{ libkey, queue }].row_major_zdotu_sycl(queue, n, x, incx, y, incy, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_isamin_sycl(queue, n, x, incx, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_idamin_sycl(queue, n, x, incx, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_icamin_sycl(queue, n, x, incx, result);
 }
 
-void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_izamin_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_isamax_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_idamax_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_icamax_sycl(queue, n, x, incx, result);
 }
 
-void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
            sycl::buffer<std::int64_t, 1>& result) {
     function_tables[{ libkey, queue }].row_major_izamax_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].row_major_scnrm2_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].row_major_dznrm2_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& result) {
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].row_major_snrm2_sycl(queue, n, x, incx, result);
 }
 
-void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& result) {
     function_tables[{ libkey, queue }].row_major_dnrm2_sycl(queue, n, x, incx, result);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy, float c, float s) {
     function_tables[{ libkey, queue }].row_major_srot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
          sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy, double c, double s) {
     function_tables[{ libkey, queue }].row_major_drot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy, float c, float s) {
     function_tables[{ libkey, queue }].row_major_csrot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<double, 1>& x,
-         std::int64_t incx, sycl::buffer<double, 1>& y, std::int64_t incy, double c, double s) {
+void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+         sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
+         std::int64_t incy, double c, double s) {
     function_tables[{ libkey, queue }].row_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& a,
           sycl::buffer<float, 1>& b, sycl::buffer<float, 1>& c, sycl::buffer<float, 1>& s) {
     function_tables[{ libkey, queue }].row_major_srotg_sycl(queue, a, b, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& a,
           sycl::buffer<double, 1>& b, sycl::buffer<double, 1>& c, sycl::buffer<double, 1>& s) {
     function_tables[{ libkey, queue }].row_major_drotg_sycl(queue, a, b, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& b, sycl::buffer<float, 1>& c,
           sycl::buffer<std::complex<float>, 1>& s) {
     function_tables[{ libkey, queue }].row_major_crotg_sycl(queue, a, b, c, s);
 }
 
-void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
+void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& b, sycl::buffer<double, 1>& c,
           sycl::buffer<std::complex<double>, 1>& s) {
     function_tables[{ libkey, queue }].row_major_zrotg_sycl(queue, a, b, c, s);
 }
 
-void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
-          sycl::buffer<float, 1>& param) {
+void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+          std::int64_t incy, sycl::buffer<float, 1>& param) {
     function_tables[{ libkey, queue }].row_major_srotm_sycl(queue, n, x, incx, y, incy, param);
 }
 
-void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& param) {
     function_tables[{ libkey, queue }].row_major_drotm_sycl(queue, n, x, incx, y, incy, param);
 }
 
-void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& d1,
+void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<float, 1>& d1,
            sycl::buffer<float, 1>& d2, sycl::buffer<float, 1>& x1, float y1,
            sycl::buffer<float, 1>& param) {
     function_tables[{ libkey, queue }].row_major_srotmg_sycl(queue, d1, d2, x1, y1, param);
 }
 
-void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& d1,
+void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer<double, 1>& d1,
            sycl::buffer<double, 1>& d2, sycl::buffer<double, 1>& x1, double y1,
            sycl::buffer<double, 1>& param) {
     function_tables[{ libkey, queue }].row_major_drotmg_sycl(queue, d1, d2, x1, y1, param);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_sscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_dscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex<float> alpha,
-          sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_cscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_csscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_zscal_sycl(queue, n, alpha, x, incx);
 }
 
-void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_zdscal_sycl(queue, n, alpha, x, incx);
 }
 
-void sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb,
+void sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb,
             sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
             std::int64_t incy, sycl::buffer<float, 1>& result) {
     function_tables[{ libkey, queue }].row_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy,
                                                              result);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float, 1>& x,
-          std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy) {
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+          sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
+          std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_sswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_dswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_cswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_zswap_sycl(queue, n, x, incx, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
           sycl::buffer<float, 1>& y, std::int64_t incy) {
@@ -4410,7 +4422,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             lda, x, incx, beta, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
@@ -4418,7 +4430,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             lda, x, incx, beta, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
@@ -4427,7 +4439,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             lda, x, incx, beta, y, incy);
 }
 
-void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
@@ -4436,7 +4448,7 @@ void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             lda, x, incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
@@ -4444,7 +4456,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
@@ -4452,7 +4464,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -4460,7 +4472,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             incx, beta, y, incy);
 }
 
-void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
           std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -4468,7 +4480,7 @@ void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::
                                                             incx, beta, y, incy);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<float, 1>& x, std::int64_t incx,
                 std::int64_t stridex, float beta, sycl::buffer<float, 1>& y, std::int64_t incy,
@@ -4478,7 +4490,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<double, 1>& x, std::int64_t incx,
                 std::int64_t stridex, double beta, sycl::buffer<double, 1>& y, std::int64_t incy,
@@ -4488,7 +4500,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x,
                 std::int64_t incx, std::int64_t stridex, std::complex<float> beta,
@@ -4499,7 +4511,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                 std::int64_t n, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
@@ -4510,7 +4522,7 @@ void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
         batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<float, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -4519,7 +4531,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stridea,
                 sycl::buffer<double, 1>& x, std::int64_t incx, std::int64_t stridex,
                 sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -4528,7 +4540,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
@@ -4537,7 +4549,7 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
+void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m,
                 std::int64_t n, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                 std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                 std::int64_t stridex, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
@@ -4546,21 +4558,21 @@ void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size);
 }
 
-void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
          std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a,
                                                            lda);
 }
 
-void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
          std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a,
                                                            lda);
 }
 
-void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
@@ -4568,7 +4580,7 @@ void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                             lda);
 }
 
-void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
@@ -4576,7 +4588,7 @@ void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                             lda);
 }
 
-void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
@@ -4584,7 +4596,7 @@ void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                             lda);
 }
 
-void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
@@ -4592,7 +4604,7 @@ void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::i
                                                             lda);
 }
 
-void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -4600,7 +4612,7 @@ void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             x, incx, beta, y, incy);
 }
 
-void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -4608,7 +4620,7 @@ void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             x, incx, beta, y, incy);
 }
 
-void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -4616,7 +4628,7 @@ void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             incx, beta, y, incy);
 }
 
-void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -4624,21 +4636,21 @@ void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             incx, beta, y, incy);
 }
 
-void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a,
                                                            lda);
 }
 
-void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a,
                                                            lda);
 }
 
-void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda) {
@@ -4646,7 +4658,7 @@ void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             y, incy, a, lda);
 }
 
-void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda) {
@@ -4654,7 +4666,7 @@ void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             y, incy, a, lda);
 }
 
-void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy) {
@@ -4662,7 +4674,7 @@ void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             incx, beta, y, incy);
 }
 
-void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy) {
@@ -4670,21 +4682,21 @@ void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             incx, beta, y, incy);
 }
 
-void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<float>, 1>& a) {
     function_tables[{ libkey, queue }].row_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                            a);
 }
 
-void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
          sycl::buffer<std::complex<double>, 1>& a) {
     function_tables[{ libkey, queue }].row_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                            a);
 }
 
-void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<float>, 1>& a) {
@@ -4692,7 +4704,7 @@ void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             y, incy, a);
 }
 
-void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
           sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
           sycl::buffer<std::complex<double>, 1>& a) {
@@ -4700,7 +4712,7 @@ void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             y, incy, a);
 }
 
-void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
           std::int64_t incy) {
@@ -4708,7 +4720,7 @@ void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             x, incx, beta, y, incy);
 }
 
-void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx, double beta, sycl::buffer<double, 1>& y,
           std::int64_t incy) {
@@ -4716,257 +4728,257 @@ void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std:
                                                             x, incx, beta, y, incy);
 }
 
-void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x, std::int64_t incx,
           float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x,
                                                             incx, beta, y, incy);
 }
 
-void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x, std::int64_t incx,
           double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x,
                                                             incx, beta, y, incy);
 }
 
-void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a) {
     function_tables[{ libkey, queue }].row_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                            a);
 }
 
-void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a) {
     function_tables[{ libkey, queue }].row_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx,
                                                            a);
 }
 
-void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a) {
     function_tables[{ libkey, queue }].row_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx,
                                                             y, incy, a);
 }
 
-void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a) {
     function_tables[{ libkey, queue }].row_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx,
                                                             y, incy, a);
 }
 
-void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& x,
           std::int64_t incx, float beta, sycl::buffer<float, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x,
                                                             incx, beta, y, incy);
 }
 
-void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& x,
           std::int64_t incx, double beta, sycl::buffer<double, 1>& y, std::int64_t incy) {
     function_tables[{ libkey, queue }].row_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x,
                                                             incx, beta, y, incy);
 }
 
-void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& a,
          std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a,
                                                            lda);
 }
 
-void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
          double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& a,
          std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a,
                                                            lda);
 }
 
-void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           float alpha, sycl::buffer<float, 1>& x, std::int64_t incx, sycl::buffer<float, 1>& y,
           std::int64_t incy, sycl::buffer<float, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx,
                                                             y, incy, a, lda);
 }
 
-void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
           double alpha, sycl::buffer<double, 1>& x, std::int64_t incx, sycl::buffer<double, 1>& y,
           std::int64_t incy, sycl::buffer<double, 1>& a, std::int64_t lda) {
     function_tables[{ libkey, queue }].row_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx,
                                                             y, incy, a, lda);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             k, a, lda, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, sycl::buffer<double, 1>& x,
           std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
           sycl::buffer<double, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           diag unit_diag, std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx) {
     function_tables[{ libkey, queue }].row_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n,
                                                             a, lda, x, incx);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
           std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -4974,7 +4986,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                             a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -4982,7 +4994,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                             a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -4991,7 +5003,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                             a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -5000,7 +5012,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                             a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc) {
@@ -5008,7 +5020,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
                                                             a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
           sycl::buffer<sycl::half, 1>& a, std::int64_t lda, sycl::buffer<sycl::half, 1>& b,
           std::int64_t ldb, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -5016,7 +5028,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
           std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<bfloat16, 1>& a,
           std::int64_t lda, sycl::buffer<bfloat16, 1>& b, std::int64_t ldb, float beta,
           sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -5024,7 +5036,7 @@ void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, tran
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -5033,7 +5045,7 @@ void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -5042,14 +5054,14 @@ void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<std::complex<float>, 1>& a,
           std::int64_t lda, float beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
     function_tables[{ libkey, queue }].row_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha,
                                                             a, lda, beta, c, ldc);
 }
 
-void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<std::complex<double>, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<std::complex<double>, 1>& c,
           std::int64_t ldc) {
@@ -5057,7 +5069,7 @@ void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tran
                                                             a, lda, beta, c, ldc);
 }
 
-void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, float beta,
@@ -5066,7 +5078,7 @@ void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
                                                              a, lda, b, ldb, beta, c, ldc);
 }
 
-void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, double beta,
@@ -5075,7 +5087,7 @@ void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
                                                              a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
           std::int64_t ldc) {
@@ -5083,7 +5095,7 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
           sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -5091,7 +5103,7 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -5100,7 +5112,7 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           std::int64_t m, std::int64_t n, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -5109,21 +5121,21 @@ void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc) {
     function_tables[{ libkey, queue }].row_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha,
                                                             a, lda, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
           std::int64_t lda, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
     function_tables[{ libkey, queue }].row_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha,
                                                             a, lda, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<float> alpha,
           sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
           sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc) {
@@ -5131,7 +5143,7 @@ void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tran
                                                             a, lda, beta, c, ldc);
 }
 
-void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
           std::int64_t n, std::int64_t k, std::complex<double> alpha,
           sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
           sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc) {
@@ -5139,7 +5151,7 @@ void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tran
                                                             a, lda, beta, c, ldc);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
@@ -5148,7 +5160,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c,
                 std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) {
@@ -5157,7 +5169,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
@@ -5167,7 +5179,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                 std::int64_t n, std::int64_t k, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c,
@@ -5177,7 +5189,7 @@ void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         batch_size);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
            sycl::buffer<float, 1>& b, std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
            std::int64_t ldc) {
@@ -5185,7 +5197,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
                                                              a, lda, b, ldb, beta, c, ldc);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
            std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
            sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -5193,7 +5205,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
                                                              a, lda, b, ldb, beta, c, ldc);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -5202,7 +5214,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
                                                              a, lda, b, ldb, beta, c, ldc);
 }
 
-void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
            std::int64_t n, std::int64_t k, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -5211,7 +5223,7 @@ void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
                                                              a, lda, b, ldb, beta, c, ldc);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
@@ -5219,7 +5231,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
@@ -5227,7 +5239,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
@@ -5235,7 +5247,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
@@ -5243,7 +5255,7 @@ void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
           sycl::buffer<float, 1>& a, std::int64_t lda, sycl::buffer<float, 1>& b,
           std::int64_t ldb) {
@@ -5251,7 +5263,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
           sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
           std::int64_t ldb) {
@@ -5259,7 +5271,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
@@ -5267,7 +5279,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
           transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
           std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
           sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
@@ -5275,7 +5287,7 @@ void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo
                                                             unit_diag, m, n, alpha, a, lda, b, ldb);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -5286,7 +5298,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, double alpha,
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b, double beta,
@@ -5297,7 +5309,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                 sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5308,7 +5320,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                 sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5319,7 +5331,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
                 sycl::buffer<sycl::half, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5330,7 +5342,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<sycl::half, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -5341,7 +5353,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5352,7 +5364,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                 sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5363,7 +5375,7 @@ void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa
         stride_c, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                 sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5373,7 +5385,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                 sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
                 sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5383,7 +5395,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                 std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
@@ -5393,7 +5405,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                 transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                 std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
@@ -5403,7 +5415,7 @@ void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
         stride_b, batch_size);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
            std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
            sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -5411,7 +5423,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, double alpha,
            sycl::buffer<double, 1>& a, std::int64_t lda, sycl::buffer<double, 1>& b,
            std::int64_t ldb, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -5419,7 +5431,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, std::complex<float> alpha,
            sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::complex<float> beta,
@@ -5428,7 +5440,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
+void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa,
            transpose transb, std::int64_t n, std::int64_t k, std::complex<double> alpha,
            sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::complex<double> beta,
@@ -5437,7 +5449,7 @@ void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, tra
         queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
                std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
@@ -5446,7 +5458,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
                std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c,
@@ -5455,7 +5467,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<int8_t, 1>& b, std::int64_t ldb, int8_t bo, float beta,
@@ -5464,7 +5476,7 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
                offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
                sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao,
                sycl::buffer<uint8_t, 1>& b, std::int64_t ldb, uint8_t bo, float beta,
@@ -5473,24 +5485,24 @@ void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
         queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
-                    std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
-                    std::int64_t stride_b, std::int64_t batch_size) {
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
+                    std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b,
+                    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
-                    std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
-                    std::int64_t stride_b, std::int64_t batch_size) {
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
+                    std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b,
+                    std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<float> alpha,
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
@@ -5498,8 +5510,8 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<double> alpha,
+void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                     std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b,
                     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
@@ -5507,37 +5519,39 @@ void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
-                    std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& ab,
+                    std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                    std::int64_t batch_size) {
     function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
-                    std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) {
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& ab,
+                    std::int64_t lda, std::int64_t ldb, std::int64_t stride,
+                    std::int64_t batch_size) {
     function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<float> alpha,
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<float> alpha,
                     sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
-                    std::int64_t n, std::complex<double> alpha,
+void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
+                    std::int64_t m, std::int64_t n, std::complex<double> alpha,
                     sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda, std::int64_t ldb,
                     std::int64_t stride, std::int64_t batch_size) {
     function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, float alpha,
                    sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a, float beta,
                    sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5548,7 +5562,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, double alpha,
                    sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a, double beta,
                    sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -5559,7 +5573,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                    sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
                    std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b,
@@ -5570,7 +5584,7 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                    transpose transb, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                    sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                    std::int64_t stride_a, std::complex<double> beta,
@@ -5582,35 +5596,35 @@ void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose tra
         stride_c, batch_size);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
               sycl::buffer<float, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda,
                                                                 b, ldb);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
               sycl::buffer<double, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda,
                                                                 b, ldb);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda,
                                                                 b, ldb);
 }
 
-void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
               std::int64_t lda, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda,
                                                                 b, ldb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<float, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
@@ -5618,7 +5632,7 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                  stridea, b, ldb, strideb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                std::int64_t stridea, sycl::buffer<double, 1>& b, std::int64_t ldb,
                std::int64_t strideb) {
@@ -5626,7 +5640,7 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                  stridea, b, ldb, strideb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
@@ -5634,7 +5648,7 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                  stridea, b, ldb, strideb);
 }
 
-void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a,
                std::int64_t lda, std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& b,
                std::int64_t ldb, std::int64_t strideb) {
@@ -5642,35 +5656,35 @@ void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
                                                                  stridea, b, ldb, strideb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, float alpha, sycl::buffer<float, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda,
                                                                 ldb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, double alpha, sycl::buffer<double, 1>& ab, std::int64_t lda,
               std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda,
                                                                 ldb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda,
                                                                 ldb);
 }
 
-void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
               std::int64_t n, std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& ab,
               std::int64_t lda, std::int64_t ldb) {
     function_tables[{ libkey, queue }].row_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda,
                                                                 ldb);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
              std::int64_t lda, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
              sycl::buffer<float, 1>& c, std::int64_t ldc) {
@@ -5678,7 +5692,7 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
                                                                a, lda, beta, b, ldb, c, ldc);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
              std::int64_t lda, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
              sycl::buffer<double, 1>& c, std::int64_t ldc) {
@@ -5686,7 +5700,7 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
                                                                a, lda, beta, b, ldb, c, ldc);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, std::complex<float> alpha,
              sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::complex<float> beta,
              sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
@@ -5695,7 +5709,7 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
                                                                a, lda, beta, b, ldb, c, ldc);
 }
 
-void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
+void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb,
              std::int64_t m, std::int64_t n, std::complex<double> alpha,
              sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::complex<double> beta,
              sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
@@ -5706,47 +5720,47 @@ void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, t
 
 // USM APIs
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_scasum_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dzasum_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sasum_usm_sycl(queue, n, x, incx, result,
                                                                        dependencies);
 }
 
-sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dasum_usm_sycl(queue, n, x, incx, result,
                                                                        dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                  const float* x, std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y,
                                                                        incy, dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                  const double* x, std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y,
                                                                        incy, dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -5754,7 +5768,7 @@ sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                        incy, dependencies);
 }
 
-sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double>* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -5762,7 +5776,7 @@ sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                        incy, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        float* alpha, const float** x, std::int64_t* incx, float** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5770,7 +5784,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        double* alpha, const double** x, std::int64_t* incx, double** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5778,7 +5792,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        std::complex<float>* alpha, const std::complex<float>** x,
                        std::int64_t* incx, std::complex<float>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -5787,7 +5801,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        std::complex<double>* alpha, const std::complex<double>** x,
                        std::int64_t* incx, std::complex<double>** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -5796,7 +5810,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5804,15 +5818,15 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
-                       const double* x, std::int64_t incx, std::int64_t stridex, double* y,
-                       std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
+                       double alpha, const double* x, std::int64_t incx, std::int64_t stridex,
+                       double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_usm_sycl(
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                        std::int64_t stridex, std::complex<float>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
@@ -5821,7 +5835,7 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                        std::int64_t stridex, std::complex<double>* y, std::int64_t incy,
                        std::int64_t stridey, std::int64_t batch_size,
@@ -5830,21 +5844,21 @@ sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                   const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_saxpby_usm_sycl(
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                   const double* x, std::int64_t incx, const double beta, double* y,
                   std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_daxpby_usm_sycl(
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                   const std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
@@ -5852,7 +5866,7 @@ sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                   const std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
                   const std::vector<sycl::event>& dependencies) {
@@ -5860,35 +5874,35 @@ sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n
         queue, n, alpha, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_scopy_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dcopy_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_ccopy_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zcopy_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const float** x, std::int64_t* incx, float** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5896,7 +5910,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const double** x, std::int64_t* incx, double** y, std::int64_t* incy,
                        std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5904,7 +5918,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5912,7 +5926,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                        const std::complex<double>** x, std::int64_t* incx, std::complex<double>** y,
                        std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5920,7 +5934,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, y, incy, group_count, group_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const float* x, std::int64_t incx, std::int64_t stridex, float* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5928,7 +5942,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const double* x, std::int64_t incx, std::int64_t stridex, double* y,
                        std::int64_t incy, std::int64_t stridey, std::int64_t batch_size,
                        const std::vector<sycl::event>& dependencies) {
@@ -5936,7 +5950,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
@@ -5944,7 +5958,7 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
                        std::complex<double>* y, std::int64_t incy, std::int64_t stridey,
                        std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
@@ -5952,28 +5966,28 @@ sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int6
         queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies);
 }
 
-sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, float* result,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sdot_usm_sycl(queue, n, x, incx, y, incy,
                                                                       result, dependencies);
 }
 
-sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                 std::int64_t incx, const double* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_ddot_usm_sycl(queue, n, x, incx, y, incy,
                                                                       result, dependencies);
 }
 
-sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                 std::int64_t incx, const float* y, std::int64_t incy, double* result,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dsdot_usm_sycl(queue, n, x, incx, y, incy,
                                                                        result, dependencies);
 }
 
-sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -5981,7 +5995,7 @@ sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                        result, dependencies);
 }
 
-sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -5989,7 +6003,7 @@ sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                        result, dependencies);
 }
 
-sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, const std::complex<float>* y,
                  std::int64_t incy, std::complex<float>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -5997,7 +6011,7 @@ sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                        result, dependencies);
 }
 
-sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, const std::complex<double>* y,
                  std::int64_t incy, std::complex<double>* result,
                  const std::vector<sycl::event>& dependencies) {
@@ -6005,96 +6019,96 @@ sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                        result, dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_isamin_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_idamin_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_icamin_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_izamin_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_isamax_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                   std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_idamax_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<float>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_icamax_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   const std::complex<double>* x, std::int64_t incx, std::int64_t* result,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_izamax_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<float>* x, std::int64_t incx, float* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_scnrm2_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  const std::complex<double>* x, std::int64_t incx, double* result,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dznrm2_usm_sycl(queue, n, x, incx, result,
                                                                         dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x,
                  std::int64_t incx, float* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_snrm2_usm_sycl(queue, n, x, incx, result,
                                                                        dependencies);
 }
 
-sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
+sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x,
                  std::int64_t incx, double* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dnrm2_usm_sycl(queue, n, x, incx, result,
                                                                        dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                 std::int64_t incy, float c, float s, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_srot_usm_sycl(queue, n, x, incx, y, incy, c,
                                                                       s, dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                 std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                 std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
@@ -6102,149 +6116,149 @@ sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
                                                                       s, dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x,
                 std::int64_t incx, float* y, std::int64_t incy, float c, float s,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_csrot_usm_sycl(queue, n, x, incx, y, incy,
                                                                        c, s, dependencies);
 }
 
-sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x,
+sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x,
                 std::int64_t incx, double* y, std::int64_t incy, double c, double s,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zdrot_usm_sycl(queue, n, x, incx, y, incy,
                                                                        c, s, dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, float* a, float* b, float* c,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, float* a, float* b, float* c,
                  float* s, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_srotg_usm_sycl(queue, a, b, c, s,
                                                                        dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, double* a, double* b, double* c,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, double* a, double* b, double* c,
                  double* s, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_drotg_usm_sycl(queue, a, b, c, s,
                                                                        dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex<float>* a,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex<float>* a,
                  std::complex<float>* b, float* c, std::complex<float>* s,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_crotg_usm_sycl(queue, a, b, c, s,
                                                                        dependencies);
 }
 
-sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex<double>* a,
+sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex<double>* a,
                  std::complex<double>* b, double* c, std::complex<double>* s,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zrotg_usm_sycl(queue, a, b, c, s,
                                                                        dependencies);
 }
 
-sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x,
+sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy, float* param,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_srotm_usm_sycl(queue, n, x, incx, y, incy,
                                                                        param, dependencies);
 }
 
-sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x,
+sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy, double* param,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_drotm_usm_sycl(queue, n, x, incx, y, incy,
                                                                        param, dependencies);
 }
 
-sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1,
+sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1,
                   float y1, float* param, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_srotmg_usm_sycl(queue, d1, d2, x1, y1,
                                                                         param, dependencies);
 }
 
-sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, double* d1, double* d2,
+sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, double* d1, double* d2,
                   double* x1, double y1, double* param,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_drotmg_usm_sycl(queue, d1, d2, x1, y1,
                                                                         param, dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sscal_usm_sycl(queue, n, alpha, x, incx,
                                                                        dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dscal_usm_sycl(queue, n, alpha, x, incx,
                                                                        dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<float> alpha, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_cscal_usm_sycl(queue, n, alpha, x, incx,
                                                                        dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<double> alpha, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_csscal_usm_sycl(queue, n, alpha, x, incx,
                                                                         dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zscal_usm_sycl(queue, n, alpha, x, incx,
                                                                        dependencies);
 }
 
-sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
+sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zdscal_usm_sycl(queue, n, alpha, x, incx,
                                                                         dependencies);
 }
 
-sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb,
+sycl::event sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb,
                    const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                    float* result, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y,
                                                                         incy, result, dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x,
                  std::int64_t incx, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sswap_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x,
                  std::int64_t incx, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dswap_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<float>* x, std::int64_t incx, std::complex<float>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_cswap_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  std::complex<double>* x, std::int64_t incx, std::complex<double>* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zswap_usm_sycl(queue, n, x, incx, y, incy,
                                                                        dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a,
                  std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
@@ -6252,7 +6266,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a,
                  std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y,
                  std::int64_t incy, const std::vector<sycl::event>& dependencies) {
@@ -6260,7 +6274,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* x,
                  std::int64_t incx, std::complex<float> beta, std::complex<float>* y,
@@ -6269,7 +6283,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* x,
                  std::int64_t incx, std::complex<double> beta, std::complex<double>* y,
@@ -6278,7 +6292,7 @@ sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -6286,7 +6300,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -6294,7 +6308,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
@@ -6303,7 +6317,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
+sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m,
                  std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
@@ -6312,7 +6326,7 @@ sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, float alpha, const float* a,
                        std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx,
                        std::int64_t stridex, float beta, float* y, std::int64_t incy,
@@ -6323,7 +6337,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, double alpha, const double* a,
                        std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx,
                        std::int64_t stridex, double beta, double* y, std::int64_t incy,
@@ -6334,7 +6348,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, std::complex<float> alpha,
                        const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
                        const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
@@ -6346,7 +6360,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                        std::int64_t m, std::int64_t n, std::complex<double> alpha,
                        const std::complex<double>* a, std::int64_t lda, std::int64_t stridea,
                        const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
@@ -6358,7 +6372,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         batch_size, dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, float* alpha, const float** a,
                        std::int64_t* lda, const float** x, std::int64_t* incx, float* beta,
                        float** y, std::int64_t* incy, std::int64_t group_count,
@@ -6368,7 +6382,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, double* alpha, const double** a,
                        std::int64_t* lda, const double** x, std::int64_t* incx, double* beta,
                        double** y, std::int64_t* incy, std::int64_t group_count,
@@ -6378,7 +6392,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
                        const std::complex<float>** a, std::int64_t* lda,
                        const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
@@ -6389,7 +6403,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                        std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
                        const std::complex<double>** a, std::int64_t* lda,
                        const std::complex<double>** x, std::int64_t* incx,
@@ -6401,7 +6415,7 @@ sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const float* a, std::int64_t lda,
                        std::int64_t stridea, const float* x, std::int64_t incx,
                        std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec,
@@ -6411,7 +6425,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const double* a, std::int64_t lda,
                        std::int64_t stridea, const double* x, std::int64_t incx,
                        std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec,
@@ -6421,7 +6435,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const std::complex<float>* a,
                        std::int64_t lda, std::int64_t stridea, const std::complex<float>* x,
                        std::int64_t incx, std::int64_t stridex, std::complex<float>* c,
@@ -6432,7 +6446,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        std::int64_t m, std::int64_t n, const std::complex<double>* a,
                        std::int64_t lda, std::int64_t stridea, const std::complex<double>* x,
                        std::int64_t incx, std::int64_t stridex, std::complex<double>* c,
@@ -6443,7 +6457,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda,
                        const float** x, std::int64_t* incx, float** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -6452,7 +6466,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda,
                        const double** x, std::int64_t* incx, double** c, std::int64_t* ldc,
                        std::int64_t group_count, std::int64_t* group_size,
@@ -6461,7 +6475,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const std::complex<float>** a,
                        std::int64_t* lda, const std::complex<float>** x, std::int64_t* incx,
                        std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
@@ -6470,7 +6484,7 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        std::int64_t* m, std::int64_t* n, const std::complex<double>** a,
                        std::int64_t* lda, const std::complex<double>** x, std::int64_t* incx,
                        std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
@@ -6479,14 +6493,14 @@ sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies);
 }
 
-sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                 float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sger_usm_sycl(
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, const double* y,
                 std::int64_t incy, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
@@ -6494,7 +6508,7 @@ sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -6502,7 +6516,7 @@ sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -6510,7 +6524,7 @@ sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -6518,7 +6532,7 @@ sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -6526,7 +6540,7 @@ sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
         queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
                  std::int64_t lda, const std::complex<float>* x, std::int64_t incx,
                  std::complex<float> beta, std::complex<float>* y, std::int64_t incy,
@@ -6535,7 +6549,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, std::complex<double> alpha, const std::complex<double>* a,
                  std::int64_t lda, const std::complex<double>* x, std::int64_t incx,
                  std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
@@ -6544,7 +6558,7 @@ sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
@@ -6553,7 +6567,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
@@ -6562,7 +6576,7 @@ sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
@@ -6570,7 +6584,7 @@ sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
@@ -6578,7 +6592,7 @@ sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -6586,7 +6600,7 @@ sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  std::int64_t lda, const std::vector<sycl::event>& dependencies) {
@@ -6594,7 +6608,7 @@ sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a,
                  const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
                  std::complex<float>* y, std::int64_t incy,
@@ -6603,7 +6617,7 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a,
                  const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
                  std::complex<double>* y, std::int64_t incy,
@@ -6612,21 +6626,21 @@ sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const std::complex<float>* x, std::int64_t incx,
                 std::complex<float>* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_chpr_usm_sycl(queue, upper_lower, n, alpha,
                                                                       x, incx, a, dependencies);
 }
 
-sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const std::complex<double>* x, std::int64_t incx,
                 std::complex<double>* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zhpr_usm_sycl(queue, upper_lower, n, alpha,
                                                                       x, incx, a, dependencies);
 }
 
-sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* x, std::int64_t incx,
                  const std::complex<float>* y, std::int64_t incy, std::complex<float>* a,
                  const std::vector<sycl::event>& dependencies) {
@@ -6634,7 +6648,7 @@ sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* x, std::int64_t incx,
                  const std::complex<double>* y, std::int64_t incy, std::complex<double>* a,
                  const std::vector<sycl::event>& dependencies) {
@@ -6642,7 +6656,7 @@ sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x,
                  std::int64_t incx, float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -6650,7 +6664,7 @@ sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -6658,49 +6672,49 @@ sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, const float* x, std::int64_t incx, float beta,
                  float* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sspmv_usm_sycl(
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, const double* x, std::int64_t incx, double beta,
                  double* y, std::int64_t incy, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dspmv_usm_sycl(
         queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sspr_usm_sycl(queue, upper_lower, n, alpha,
                                                                       x, incx, a, dependencies);
 }
 
-sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dspr_usm_sycl(queue, upper_lower, n, alpha,
                                                                       x, incx, a, dependencies);
 }
 
-sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sspr2_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dspr2_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies);
 }
 
-sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx,
                  float beta, float* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -6708,7 +6722,7 @@ sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* a, std::int64_t lda, const double* x,
                  std::int64_t incx, double beta, double* y, std::int64_t incy,
                  const std::vector<sycl::event>& dependencies) {
@@ -6716,28 +6730,28 @@ sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies);
 }
 
-sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_ssyr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                 double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda,
                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dsyr_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, a, lda, dependencies);
 }
 
-sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy,
                  float* a, std::int64_t lda, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_ssyr2_usm_sycl(
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
+sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n,
                  double alpha, const double* x, std::int64_t incx, const double* y,
                  std::int64_t incy, double* a, std::int64_t lda,
                  const std::vector<sycl::event>& dependencies) {
@@ -6745,21 +6759,21 @@ sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_stbmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dtbmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6767,7 +6781,7 @@ sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6775,21 +6789,21 @@ sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
                  float* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_stbsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
                  double* x, std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dtbsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<float>* a,
                  std::int64_t lda, std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6797,7 +6811,7 @@ sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, std::int64_t k, const std::complex<double>* a,
                  std::int64_t lda, std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6805,21 +6819,21 @@ sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_stpmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dtpmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6827,7 +6841,7 @@ sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6835,21 +6849,21 @@ sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_stpsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dtpsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6857,7 +6871,7 @@ sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6865,21 +6879,21 @@ sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_strmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dtrmv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6887,7 +6901,7 @@ sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6895,21 +6909,21 @@ sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_strsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x,
                  std::int64_t incx, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dtrsv_usm_sycl(
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6917,7 +6931,7 @@ sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  diag unit_diag, std::int64_t n, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* x, std::int64_t incx,
                  const std::vector<sycl::event>& dependencies) {
@@ -6925,42 +6939,44 @@ sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a,
-                 std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                 const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta,
+                 float* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_sgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a,
-                 std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha,
+                 const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta,
+                 double* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
-                 const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
-                 std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                 std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
+                 const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
+                 std::complex<float>* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_cgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
-                 const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
-                 std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
+                 std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
+                 const std::complex<double>* b, std::int64_t ldb, std::complex<double> beta,
+                 std::complex<double>* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_zgemm_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
                  const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
                  sycl::half beta, sycl::half* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -6968,23 +6984,25 @@ sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
-                 std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                 const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb,
+                 float beta, float* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb,
-                 std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a,
-                 std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c,
-                 std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
+sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
+                 transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+                 const bfloat16* a, std::int64_t lda, const bfloat16* b, std::int64_t ldb,
+                 float beta, float* c, std::int64_t ldc,
+                 const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_usm_sycl(
         queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
@@ -6993,7 +7011,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
@@ -7002,7 +7020,7 @@ sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, float alpha, const std::complex<float>* a,
                  std::int64_t lda, float beta, std::complex<float>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -7010,7 +7028,7 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, double alpha, const std::complex<double>* a,
                  std::int64_t lda, double beta, std::complex<double>* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -7018,8 +7036,8 @@ sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<float> alpha,
+sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, float beta, std::complex<float>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
@@ -7027,8 +7045,8 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<double> alpha,
+sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, double beta, std::complex<double>* c, std::int64_t ldc,
                   const std::vector<sycl::event>& dependencies) {
@@ -7036,7 +7054,7 @@ sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -7044,7 +7062,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -7052,7 +7070,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                  std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
@@ -7061,7 +7079,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  std::int64_t m, std::int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                  std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
@@ -7070,7 +7088,7 @@ sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
                  float beta, float* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -7078,7 +7096,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
                  double beta, double* c, std::int64_t ldc,
                  const std::vector<sycl::event>& dependencies) {
@@ -7086,7 +7104,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, std::complex<float> alpha,
                  const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                  std::complex<float>* c, std::int64_t ldc,
@@ -7095,7 +7113,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
+sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
                  std::int64_t n, std::int64_t k, std::complex<double> alpha,
                  const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                  std::complex<double>* c, std::int64_t ldc,
@@ -7104,7 +7122,7 @@ sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lowe
         queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha,
                        const float** a, std::int64_t* lda, float* beta, float** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -7114,7 +7132,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha,
                        const double** a, std::int64_t* lda, double* beta, double** c,
                        std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
@@ -7124,7 +7142,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k,
                        std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
                        std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
@@ -7135,7 +7153,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower,
                        transpose* trans, std::int64_t* n, std::int64_t* k,
                        std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, std::complex<double>* beta, std::complex<double>** c,
@@ -7146,7 +7164,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upp
         dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a,
                        std::int64_t lda, std::int64_t stride_a, float beta, float* c,
                        std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
@@ -7156,7 +7174,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, double alpha,
                        const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                        double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
@@ -7166,7 +7184,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                        const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                        std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
@@ -7177,7 +7195,7 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                        transpose trans, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                        const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                        std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
@@ -7188,24 +7206,24 @@ sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo uppe
         batch_size, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
-                  const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies) {
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a,
+                  std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c,
+                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_ssyr2k_usm_sycl(
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
-                  const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc,
-                  const std::vector<sycl::event>& dependencies) {
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, double alpha, const double* a,
+                  std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c,
+                  std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dsyr2k_usm_sycl(
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<float> alpha,
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<float> alpha,
                   const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
                   std::int64_t ldb, std::complex<float> beta, std::complex<float>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -7213,8 +7231,8 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans,
-                  std::int64_t n, std::int64_t k, std::complex<double> alpha,
+sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
+                  transpose trans, std::int64_t n, std::int64_t k, std::complex<double> alpha,
                   const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
                   std::int64_t ldb, std::complex<double> beta, std::complex<double>* c,
                   std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -7222,7 +7240,7 @@ sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -7231,7 +7249,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -7240,7 +7258,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
@@ -7250,7 +7268,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
@@ -7260,7 +7278,7 @@ sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha,
                  const float* a, std::int64_t lda, float* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -7269,7 +7287,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha,
                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
                  const std::vector<sycl::event>& dependencies) {
@@ -7278,7 +7296,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                  std::complex<float>* b, std::int64_t ldb,
@@ -7288,7 +7306,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
+sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower,
                  transpose trans, diag unit_diag, std::int64_t m, std::int64_t n,
                  std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                  std::complex<double>* b, std::int64_t ldb,
@@ -7298,7 +7316,7 @@ sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right
         dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, float alpha, const float* a, std::int64_t lda,
                        std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
@@ -7308,7 +7326,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, double alpha, const double* a, std::int64_t lda,
                        std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
@@ -7318,7 +7336,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* b,
@@ -7329,7 +7347,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right,
                        uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m,
                        std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
@@ -7340,7 +7358,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left
         stride_b, batch_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
@@ -7350,7 +7368,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, double* alpha, const double** a, std::int64_t* lda,
                        double** b, std::int64_t* ldb, std::int64_t group_count,
@@ -7360,7 +7378,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, std::complex<float>* alpha, const std::complex<float>** a,
                        std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
@@ -7371,7 +7389,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right,
+sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right,
                        uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m,
                        std::int64_t* n, std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
@@ -7382,7 +7400,7 @@ sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* lef
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const float** a, std::int64_t* lda, const float** b,
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
@@ -7393,7 +7411,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        double* alpha, const double** a, std::int64_t* lda, const double** b,
                        std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc,
@@ -7404,7 +7422,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
                        const std::complex<float>** b, std::int64_t* ldb, std::complex<float>* beta,
@@ -7415,7 +7433,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        std::complex<double>* alpha, const std::complex<double>** a,
                        std::int64_t* lda, const std::complex<double>** b, std::int64_t* ldb,
@@ -7427,7 +7445,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        sycl::half* alpha, const sycl::half** a, std::int64_t* lda,
                        const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c,
@@ -7438,7 +7456,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b,
                        std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc,
@@ -7449,7 +7467,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const std::int8_t** a, std::int64_t* lda,
                        const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
@@ -7460,7 +7478,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa,
                        transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                        float* alpha, const std::int8_t** a, std::int64_t* lda,
                        const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
@@ -7471,7 +7489,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         group_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const float* a, std::int64_t lda, std::int64_t stride_a,
                        const float* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -7482,7 +7500,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        double alpha, const double* a, std::int64_t lda, std::int64_t stride_a,
                        const double* b, std::int64_t ldb, std::int64_t stride_b, double beta,
@@ -7493,7 +7511,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                        std::int64_t stride_a, const std::complex<float>* b, std::int64_t ldb,
@@ -7505,7 +7523,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                        std::int64_t stride_a, const std::complex<double>* b, std::int64_t ldb,
@@ -7517,7 +7535,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        sycl::half alpha, const sycl::half* a, std::int64_t lda,
                        std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
@@ -7529,7 +7547,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a,
                        const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -7540,7 +7558,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -7551,7 +7569,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                        transpose transb, std::int64_t m, std::int64_t n, std::int64_t k,
                        float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a,
                        const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta,
@@ -7562,7 +7580,7 @@ sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         stride_c, batch_size, dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha,
                   const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta,
                   float* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -7571,7 +7589,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha,
                   const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta,
                   double* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -7580,7 +7598,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k,
                   std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                   const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
@@ -7591,7 +7609,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower,
+sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower,
                   transpose transa, transpose transb, std::int64_t n, std::int64_t k,
                   std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
                   const std::complex<double>* b, std::int64_t ldb, std::complex<double> beta,
@@ -7602,7 +7620,7 @@ sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_low
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda,
                       std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
@@ -7613,7 +7631,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda,
                       std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
@@ -7624,7 +7642,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda,
                       std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo,
@@ -7635,7 +7653,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                       transpose transb, offset offsetc, std::int64_t m, std::int64_t n,
                       std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda,
                       std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo,
@@ -7646,7 +7664,7 @@ sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, float alpha, const float* a,
                            std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
@@ -7655,7 +7673,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, double alpha, const double* a,
                            std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb,
                            std::int64_t stride_b, std::int64_t batch_size,
@@ -7664,7 +7682,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<float> alpha,
                            const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -7673,7 +7691,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<double> alpha,
                            const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                            std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -7682,7 +7700,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                            std::int64_t ldb, std::int64_t stride, std::int64_t batch_size,
                            const std::vector<sycl::event>& dependencies) {
@@ -7690,7 +7708,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, double alpha, double* ab,
                            std::int64_t lda, std::int64_t ldb, std::int64_t stride,
                            std::int64_t batch_size, const std::vector<sycl::event>& dependencies) {
@@ -7698,7 +7716,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<float> alpha,
                            std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
@@ -7707,7 +7725,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                            std::int64_t m, std::int64_t n, std::complex<double> alpha,
                            std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                            std::int64_t stride, std::int64_t batch_size,
@@ -7716,7 +7734,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n, float alpha,
                           const float* a, std::int64_t lda, std::int64_t stride_a, float beta,
                           const float* b, std::int64_t ldb, std::int64_t stride_b, float* c,
@@ -7727,7 +7745,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n, double alpha,
                           const double* a, std::int64_t lda, std::int64_t stride_a, double beta,
                           const double* b, std::int64_t ldb, std::int64_t stride_b, double* c,
@@ -7738,7 +7756,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n,
                           std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
                           std::int64_t stride_a, std::complex<float> beta,
@@ -7750,7 +7768,7 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                           transpose transb, std::int64_t m, std::int64_t n,
                           std::complex<double> alpha, const std::complex<double>* a,
                           std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
@@ -7762,14 +7780,14 @@ sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transp
         stride_c, batch_size, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                      float* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_somatcopy_usm_sycl(
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, double alpha, const double* a,
                      std::int64_t lda, double* b, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
@@ -7777,7 +7795,7 @@ sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<float> alpha,
                      const std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
@@ -7785,7 +7803,7 @@ sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<double> alpha,
                      const std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
@@ -7793,7 +7811,7 @@ sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, a, lda, b, ldb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
                       std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb,
                       const std::vector<sycl::event>& dependencies) {
@@ -7801,7 +7819,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, double alpha, const double* a,
                       std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb,
                       std::int64_t strideb, const std::vector<sycl::event>& dependencies) {
@@ -7809,7 +7827,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, std::complex<float> alpha,
                       const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
                       std::complex<float>* b, std::int64_t ldb, std::int64_t strideb,
@@ -7818,7 +7836,7 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                       std::int64_t m, std::int64_t n, std::complex<double> alpha,
                       const std::complex<double>* a, std::int64_t lda, std::int64_t stridea,
                       std::complex<double>* b, std::int64_t ldb, std::int64_t strideb,
@@ -7827,21 +7845,21 @@ sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose
         queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_simatcopy_usm_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda,
                      std::int64_t ldb, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].row_major_dimatcopy_usm_sycl(
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<float> alpha,
                      std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
@@ -7849,7 +7867,7 @@ sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans,
+sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans,
                      std::int64_t m, std::int64_t n, std::complex<double> alpha,
                      std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
                      const std::vector<sycl::event>& dependencies) {
@@ -7857,7 +7875,7 @@ sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose t
         queue, trans, m, n, alpha, ab, lda, ldb, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a,
                     std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -7865,7 +7883,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a,
                     std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c,
                     std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
@@ -7873,7 +7891,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, std::complex<float> alpha,
                     const std::complex<float>* a, std::int64_t lda, std::complex<float> beta,
                     const std::complex<float>* b, std::int64_t ldb, std::complex<float>* c,
@@ -7882,7 +7900,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa,
+sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa,
                     transpose transb, std::int64_t m, std::int64_t n, std::complex<double> alpha,
                     const std::complex<double>* a, std::int64_t lda, std::complex<double> beta,
                     const std::complex<double>* b, std::int64_t ldb, std::complex<double>* c,
@@ -7891,7 +7909,7 @@ sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose tr
         queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, float* alpha, const float** a,
                            std::int64_t* lda, float** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -7900,7 +7918,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, double* alpha, const double** a,
                            std::int64_t* lda, double** b, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -7909,7 +7927,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
                            const std::complex<float>** a, std::int64_t* lda,
                            std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
@@ -7918,7 +7936,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
                            const std::complex<double>** a, std::int64_t* lda,
                            std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
@@ -7927,7 +7945,7 @@ sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, float* alpha, float** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
@@ -7935,7 +7953,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, double* alpha, double** ab,
                            std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
                            std::int64_t* groupsize, const std::vector<sycl::event>& dependencies) {
@@ -7943,7 +7961,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<float>* alpha,
                            std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -7952,7 +7970,7 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
         queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies);
 }
 
-sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans,
+sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans,
                            std::int64_t* m, std::int64_t* n, std::complex<double>* alpha,
                            std::complex<double>** ab, std::int64_t* lda, std::int64_t* ldb,
                            std::int64_t group_count, std::int64_t* groupsize,
@@ -7964,5 +7982,5 @@ sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, trans
 } //namespace detail
 } //namespace row_major
 } //namespace blas
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
diff --git a/src/blas/function_table.hpp b/src/blas/function_table.hpp
index c821a4a51..cd07f9dca 100644
--- a/src/blas/function_table.hpp
+++ b/src/blas/function_table.hpp
@@ -27,7 +27,7 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 typedef struct {
     int version;
@@ -251,53 +251,53 @@ typedef struct {
     void (*column_major_zswap_sycl)(sycl::queue& queue, std::int64_t n,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*column_major_sgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_sgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, std::int64_t kl,
                                     std::int64_t ku, float alpha, sycl::buffer<float, 1>& a,
                                     std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
                                     float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*column_major_dgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_dgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, std::int64_t kl,
                                     std::int64_t ku, double alpha, sycl::buffer<double, 1>& a,
                                     std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
                                     double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*column_major_cgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_cgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, std::int64_t kl,
                                     std::int64_t ku, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-    void (*column_major_zgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_zgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, std::int64_t kl,
                                     std::int64_t ku, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*column_major_sgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_sgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, float alpha,
                                     sycl::buffer<float, 1>& a, std::int64_t lda,
                                     sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
                                     sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*column_major_dgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_dgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, double alpha,
                                     sycl::buffer<double, 1>& a, std::int64_t lda,
                                     sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
                                     sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*column_major_cgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_cgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-    void (*column_major_zgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_zgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                     std::int64_t m, std::int64_t n, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*column_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                   std::int64_t m, std::int64_t n, float alpha,
                                                   sycl::buffer<float, 1>& a, std::int64_t lda,
                                                   std::int64_t stridea, sycl::buffer<float, 1>& x,
@@ -305,7 +305,7 @@ typedef struct {
                                                   float beta, sycl::buffer<float, 1>& y,
                                                   std::int64_t incy, std::int64_t stridey,
                                                   std::int64_t batch_size);
-    void (*column_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                   std::int64_t m, std::int64_t n, double alpha,
                                                   sycl::buffer<double, 1>& a, std::int64_t lda,
                                                   std::int64_t stridea, sycl::buffer<double, 1>& x,
@@ -314,25 +314,25 @@ typedef struct {
                                                   std::int64_t incy, std::int64_t stridey,
                                                   std::int64_t batch_size);
     void (*column_major_cgemv_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
         std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
         std::int64_t stridex, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
         std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
     void (*column_major_zgemv_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
         std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
         std::int64_t stridex, std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
         std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-    void (*column_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
+    void (*column_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right,
                                                   std::int64_t m, std::int64_t n,
                                                   sycl::buffer<float, 1>& a, std::int64_t lda,
                                                   std::int64_t stridea, sycl::buffer<float, 1>& x,
                                                   std::int64_t incx, std::int64_t stridex,
                                                   sycl::buffer<float, 1>& c, std::int64_t ldc,
                                                   std::int64_t stridec, std::int64_t batch_size);
-    void (*column_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
+    void (*column_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right,
                                                   std::int64_t m, std::int64_t n,
                                                   sycl::buffer<double, 1>& a, std::int64_t lda,
                                                   std::int64_t stridea, sycl::buffer<double, 1>& x,
@@ -340,13 +340,13 @@ typedef struct {
                                                   sycl::buffer<double, 1>& c, std::int64_t ldc,
                                                   std::int64_t stridec, std::int64_t batch_size);
     void (*column_major_cdgmm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stridea,
         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc, std::int64_t stridec,
         std::int64_t batch_size);
     void (*column_major_zdgmm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stridea,
         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -379,736 +379,730 @@ typedef struct {
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-    void (*column_major_chbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_chbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::int64_t k, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-    void (*column_major_zhbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_zhbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::int64_t k, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*column_major_chemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_chemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-    void (*column_major_zhemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_zhemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*column_major_cher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_cher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, float alpha,
                                    sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                    sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-    void (*column_major_zher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_zher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, double alpha,
                                    sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                    sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-    void (*column_major_cher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_cher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                     sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda);
-    void (*column_major_zher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_zher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-    void (*column_major_chpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_chpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-    void (*column_major_zhpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_zhpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*column_major_chpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_chpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, float alpha,
                                    sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                    sycl::buffer<std::complex<float>, 1>& a);
-    void (*column_major_zhpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_zhpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, double alpha,
                                    sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                    sycl::buffer<std::complex<double>, 1>& a);
-    void (*column_major_chpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_chpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                     sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy,
                                     sycl::buffer<std::complex<float>, 1>& a);
-    void (*column_major_zhpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_zhpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                     sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
                                     sycl::buffer<std::complex<double>, 1>& a);
-    void (*column_major_ssbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_ssbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::int64_t k, float alpha,
                                     sycl::buffer<float, 1>& a, std::int64_t lda,
                                     sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
                                     sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*column_major_dsbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_dsbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, std::int64_t k, double alpha,
                                     sycl::buffer<double, 1>& a, std::int64_t lda,
                                     sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
                                     sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*column_major_sspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_sspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
                                     sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
                                     sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*column_major_dspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_dspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
                                     sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
                                     sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*column_major_sspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_sspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
                                    std::int64_t incx, sycl::buffer<float, 1>& a);
-    void (*column_major_dspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_dspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
                                    std::int64_t incx, sycl::buffer<double, 1>& a);
-    void (*column_major_sspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_sspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
                                     std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
                                     sycl::buffer<float, 1>& a);
-    void (*column_major_dspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_dspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
                                     std::int64_t incx, sycl::buffer<double, 1>& y,
                                     std::int64_t incy, sycl::buffer<double, 1>& a);
-    void (*column_major_ssymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_ssymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
                                     std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
                                     float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*column_major_dsymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_dsymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
                                     std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
                                     double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*column_major_ssyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_ssyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
                                    std::int64_t incx, sycl::buffer<float, 1>& a, std::int64_t lda);
-    void (*column_major_dsyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_dsyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                    std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
                                    std::int64_t incx, sycl::buffer<double, 1>& a, std::int64_t lda);
-    void (*column_major_ssyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_ssyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, float alpha, sycl::buffer<float, 1>& x,
                                     std::int64_t incx, sycl::buffer<float, 1>& y, std::int64_t incy,
                                     sycl::buffer<float, 1>& a, std::int64_t lda);
-    void (*column_major_dsyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    void (*column_major_dsyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                     std::int64_t n, double alpha, sycl::buffer<double, 1>& x,
                                     std::int64_t incx, sycl::buffer<double, 1>& y,
                                     std::int64_t incy, sycl::buffer<double, 1>& a,
                                     std::int64_t lda);
-    void (*column_major_stbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_stbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
                                     std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*column_major_dtbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_dtbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
                                     std::int64_t lda, sycl::buffer<double, 1>& x,
                                     std::int64_t incx);
-    void (*column_major_ctbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ctbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*column_major_ztbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ztbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*column_major_stbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_stbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
                                     std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*column_major_dtbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_dtbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
                                     std::int64_t lda, sycl::buffer<double, 1>& x,
                                     std::int64_t incx);
-    void (*column_major_ctbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ctbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*column_major_ztbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ztbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, std::int64_t k,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*column_major_stpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_stpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<float, 1>& a,
                                     sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*column_major_dtpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_dtpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<double, 1>& a,
                                     sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*column_major_ctpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ctpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*column_major_ztpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ztpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*column_major_stpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_stpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<float, 1>& a,
                                     sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*column_major_dtpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_dtpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<double, 1>& a,
                                     sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*column_major_ctpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ctpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                     sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*column_major_ztpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ztpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                     sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*column_major_strmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_strmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
                                     sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*column_major_dtrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_dtrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
                                     sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*column_major_ctrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ctrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                     std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
                                     std::int64_t incx);
-    void (*column_major_ztrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ztrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                     std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
                                     std::int64_t incx);
-    void (*column_major_strsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_strsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
                                     sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*column_major_dtrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_dtrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
                                     sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*column_major_ctrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ctrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                     std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
                                     std::int64_t incx);
-    void (*column_major_ztrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*column_major_ztrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                     std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                     std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
                                     std::int64_t incx);
-    void (*column_major_sgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*column_major_sgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                                     std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb,
                                     float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*column_major_dgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*column_major_dgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                                     std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb,
                                     double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*column_major_cgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*column_major_cgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     std::int64_t k, std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*column_major_zgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     std::int64_t k, std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*column_major_hgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*column_major_hgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     std::int64_t k, sycl::half alpha,
                                     sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
                                     sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
                                     sycl::half beta, sycl::buffer<sycl::half, 1>& c,
                                     std::int64_t ldc);
-    void (*column_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                             oneapi::mkl::transpose transb, std::int64_t m,
+    void (*column_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                             oneapi::math::transpose transb, std::int64_t m,
                                              std::int64_t n, std::int64_t k, float alpha,
                                              sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
                                              sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
                                              float beta, sycl::buffer<float, 1>& c,
                                              std::int64_t ldc);
-    void (*column_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    void (*column_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::int64_t k, float alpha,
-                                               sycl::buffer<oneapi::mkl::bfloat16, 1>& a,
+                                               sycl::buffer<oneapi::math::bfloat16, 1>& a,
                                                std::int64_t lda,
-                                               sycl::buffer<oneapi::mkl::bfloat16, 1>& b,
+                                               sycl::buffer<oneapi::math::bfloat16, 1>& b,
                                                std::int64_t ldb, float beta,
                                                sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*column_major_chemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*column_major_chemm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                     std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zhemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*column_major_zhemm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                     std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*column_major_cherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_cherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                     float alpha, sycl::buffer<std::complex<float>, 1>& a,
                                     std::int64_t lda, float beta,
                                     sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_zherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                     double alpha, sycl::buffer<std::complex<double>, 1>& a,
                                     std::int64_t lda, double beta,
                                     sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*column_major_cher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_cher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                      std::complex<float> alpha,
                                      sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                      float beta, sycl::buffer<std::complex<float>, 1>& c,
                                      std::int64_t ldc);
-    void (*column_major_zher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_zher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                      std::complex<double> alpha,
                                      sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                      double beta, sycl::buffer<std::complex<double>, 1>& c,
                                      std::int64_t ldc);
-    void (*column_major_ssymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*column_major_ssymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                     float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                     sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
                                     sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*column_major_dsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*column_major_dsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                     double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                     sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
                                     sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*column_major_csymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*column_major_csymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                     std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*column_major_zsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                     std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*column_major_ssyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_ssyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                     float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                     float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*column_major_dsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_dsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                     double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                     double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*column_major_csyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_csyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                     std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                    oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_zsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                    oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                     std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     std::complex<double> beta,
                                     sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*column_major_ssyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                                  oneapi::mkl::transpose trans, std::int64_t n,
-                                                  std::int64_t k, float alpha,
-                                                  sycl::buffer<float, 1>& a, std::int64_t lda,
-                                                  std::int64_t stride_a, float beta,
-                                                  sycl::buffer<float, 1>& c, std::int64_t ldc,
-                                                  std::int64_t stride_c, std::int64_t batch_size);
-    void (*column_major_dsyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                                  oneapi::mkl::transpose trans, std::int64_t n,
-                                                  std::int64_t k, double alpha,
-                                                  sycl::buffer<double, 1>& a, std::int64_t lda,
-                                                  std::int64_t stride_a, double beta,
-                                                  sycl::buffer<double, 1>& c, std::int64_t ldc,
-                                                  std::int64_t stride_c, std::int64_t batch_size);
+    void (*column_major_ssyrk_batch_strided_sycl)(
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+        std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
+        std::int64_t stride_a, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc,
+        std::int64_t stride_c, std::int64_t batch_size);
+    void (*column_major_dsyrk_batch_strided_sycl)(
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+        std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
+        std::int64_t stride_a, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc,
+        std::int64_t stride_c, std::int64_t batch_size);
     void (*column_major_csyrk_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<float> alpha,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*column_major_zsyrk_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<double> alpha,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
-    void (*column_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                      float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                      sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
                                      sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*column_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                      double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                      sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
                                      sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*column_major_csyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_csyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                      std::complex<float> alpha,
                                      sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                      std::complex<float> beta,
                                      sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*column_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                      std::complex<double> alpha,
                                      sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                      std::complex<double> beta,
                                      sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*column_major_strmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_strmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                     sycl::buffer<float, 1>& b, std::int64_t ldb);
-    void (*column_major_dtrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_dtrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                     sycl::buffer<double, 1>& b, std::int64_t ldb);
-    void (*column_major_ctrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_ctrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-    void (*column_major_ztrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_ztrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-    void (*column_major_strsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_strsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                     sycl::buffer<float, 1>& b, std::int64_t ldb);
-    void (*column_major_dtrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_dtrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                     sycl::buffer<double, 1>& b, std::int64_t ldb);
-    void (*column_major_ctrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_ctrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-    void (*column_major_ztrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                    oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                    oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*column_major_ztrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                    oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                    oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                     std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
     void (*column_major_sgemm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
         std::int64_t lda, std::int64_t stride_a, sycl::buffer<float, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*column_major_dgemm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
         std::int64_t lda, std::int64_t stride_a, sycl::buffer<double, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, double beta, sycl::buffer<double, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*column_major_cgemm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*column_major_zgemm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
-    void (*column_major_hgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                                  oneapi::mkl::transpose transb, std::int64_t m,
-                                                  std::int64_t n, std::int64_t k, sycl::half alpha,
-                                                  sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
-                                                  std::int64_t stride_a,
-                                                  sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
-                                                  std::int64_t stride_b, sycl::half beta,
-                                                  sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
-                                                  std::int64_t stride_c, std::int64_t batch_size);
+    void (*column_major_hgemm_batch_strided_sycl)(
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha,
+        sycl::buffer<sycl::half, 1>& a, std::int64_t lda, std::int64_t stride_a,
+        sycl::buffer<sycl::half, 1>& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta,
+        sycl::buffer<sycl::half, 1>& c, std::int64_t ldc, std::int64_t stride_c,
+        std::int64_t batch_size);
     void (*column_major_gemm_f16f16f32_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a,
         std::int64_t lda, std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*column_major_gemm_s8s8f32_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
         sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*column_major_gemm_s8s8s32_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
         sycl::buffer<int32_t, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*column_major_strsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size);
     void (*column_major_dtrsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size);
     void (*column_major_ctrsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     void (*column_major_ztrsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
-    void (*column_major_sgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*column_major_sgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose transa, oneapi::math::transpose transb,
                                      std::int64_t n, std::int64_t k, float alpha,
                                      sycl::buffer<float, 1>& a, std::int64_t lda,
                                      sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
                                      sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*column_major_dgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*column_major_dgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose transa, oneapi::math::transpose transb,
                                      std::int64_t n, std::int64_t k, double alpha,
                                      sycl::buffer<double, 1>& a, std::int64_t lda,
                                      sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
                                      sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*column_major_cgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*column_major_cgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose transa, oneapi::math::transpose transb,
                                      std::int64_t n, std::int64_t k, std::complex<float> alpha,
                                      sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                      std::complex<float> beta,
                                      sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                     oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*column_major_zgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                     oneapi::math::transpose transa, oneapi::math::transpose transb,
                                      std::int64_t n, std::int64_t k, std::complex<double> alpha,
                                      sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                      std::complex<double> beta,
                                      sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
     void (*column_major_gemm_s8u8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
         std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
     void (*column_major_gemm_s8s8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
         std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
     void (*column_major_gemm_u8s8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao, sycl::buffer<int8_t, 1>& b,
         std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
     void (*column_major_gemm_u8u8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao, sycl::buffer<uint8_t, 1>& b,
         std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
     void (*column_major_somatcopy_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size);
     void (*column_major_domatcopy_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size);
     void (*column_major_comatcopy_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     void (*column_major_zomatcopy_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     void (*column_major_simatcopy_batch_strided_sycl)(sycl::queue& queue,
-                                                      oneapi::mkl::transpose trans, std::int64_t m,
+                                                      oneapi::math::transpose trans, std::int64_t m,
                                                       std::int64_t n, float alpha,
                                                       sycl::buffer<float, 1>& ab, std::int64_t lda,
                                                       std::int64_t ldb, std::int64_t stride,
                                                       std::int64_t batch_size);
     void (*column_major_dimatcopy_batch_strided_sycl)(sycl::queue& queue,
-                                                      oneapi::mkl::transpose trans, std::int64_t m,
+                                                      oneapi::math::transpose trans, std::int64_t m,
                                                       std::int64_t n, double alpha,
                                                       sycl::buffer<double, 1>& ab, std::int64_t lda,
                                                       std::int64_t ldb, std::int64_t stride,
                                                       std::int64_t batch_size);
     void (*column_major_cimatcopy_batch_strided_sycl)(sycl::queue& queue,
-                                                      oneapi::mkl::transpose trans, std::int64_t m,
+                                                      oneapi::math::transpose trans, std::int64_t m,
                                                       std::int64_t n, std::complex<float> alpha,
                                                       sycl::buffer<std::complex<float>, 1>& ab,
                                                       std::int64_t lda, std::int64_t ldb,
                                                       std::int64_t stride, std::int64_t batch_size);
     void (*column_major_zimatcopy_batch_strided_sycl)(sycl::queue& queue,
-                                                      oneapi::mkl::transpose trans, std::int64_t m,
+                                                      oneapi::math::transpose trans, std::int64_t m,
                                                       std::int64_t n, std::complex<double> alpha,
                                                       sycl::buffer<std::complex<double>, 1>& ab,
                                                       std::int64_t lda, std::int64_t ldb,
                                                       std::int64_t stride, std::int64_t batch_size);
     void (*column_major_somatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
         std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*column_major_domatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
         std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*column_major_comatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<float> alpha,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*column_major_zomatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<double> alpha,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
 
-    void (*column_major_somatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_somatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, float alpha,
                                         sycl::buffer<float, 1>& a, std::int64_t lda,
                                         sycl::buffer<float, 1>& b, std::int64_t ldb);
-    void (*column_major_domatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_domatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, double alpha,
                                         sycl::buffer<double, 1>& a, std::int64_t lda,
                                         sycl::buffer<double, 1>& b, std::int64_t ldb);
-    void (*column_major_comatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_comatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, std::complex<float> alpha,
                                         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-    void (*column_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, std::complex<double> alpha,
                                         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                         sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-    void (*column_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                          std::int64_t m, std::int64_t n, float alpha,
                                          sycl::buffer<float, 1>& a, std::int64_t lda,
                                          std::int64_t stridea, sycl::buffer<float, 1>& b,
                                          std::int64_t ldb, std::int64_t strideb);
-    void (*column_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                          std::int64_t m, std::int64_t n, double alpha,
                                          sycl::buffer<double, 1>& a, std::int64_t lda,
                                          std::int64_t stridea, sycl::buffer<double, 1>& b,
                                          std::int64_t ldb, std::int64_t strideb);
-    void (*column_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                          std::int64_t m, std::int64_t n, std::complex<float> alpha,
                                          sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                          std::int64_t stridea,
                                          sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                          std::int64_t strideb);
-    void (*column_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                          std::int64_t m, std::int64_t n, std::complex<double> alpha,
                                          sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                          std::int64_t stridea,
                                          sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                          std::int64_t strideb);
-    void (*column_major_simatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_simatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, float alpha,
                                         sycl::buffer<float, 1>& ab, std::int64_t lda,
                                         std::int64_t ldb);
-    void (*column_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, double alpha,
                                         sycl::buffer<double, 1>& ab, std::int64_t lda,
                                         std::int64_t ldb);
-    void (*column_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, std::complex<float> alpha,
                                         sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
                                         std::int64_t ldb);
-    void (*column_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*column_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                         std::int64_t m, std::int64_t n, std::complex<double> alpha,
                                         sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
                                         std::int64_t ldb);
-    void (*column_major_somatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                       oneapi::mkl::transpose transb, std::int64_t m,
+    void (*column_major_somatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                       oneapi::math::transpose transb, std::int64_t m,
                                        std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
                                        std::int64_t lda, float beta, sycl::buffer<float, 1>& b,
                                        std::int64_t ldb, sycl::buffer<float, 1>& c,
                                        std::int64_t ldc);
-    void (*column_major_domatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                       oneapi::mkl::transpose transb, std::int64_t m,
+    void (*column_major_domatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                       oneapi::math::transpose transb, std::int64_t m,
                                        std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
                                        std::int64_t lda, double beta, sycl::buffer<double, 1>& b,
                                        std::int64_t ldb, sycl::buffer<double, 1>& c,
                                        std::int64_t ldc);
-    void (*column_major_comatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                       oneapi::mkl::transpose transb, std::int64_t m,
+    void (*column_major_comatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                       oneapi::math::transpose transb, std::int64_t m,
                                        std::int64_t n, std::complex<float> alpha,
                                        sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                        std::complex<float> beta,
                                        sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                        sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*column_major_zomatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                       oneapi::mkl::transpose transb, std::int64_t m,
+    void (*column_major_zomatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                       oneapi::math::transpose transb, std::int64_t m,
                                        std::int64_t n, std::complex<double> alpha,
                                        sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                        std::complex<double> beta,
@@ -1409,24 +1403,24 @@ typedef struct {
                                                std::complex<double>* x, std::int64_t incx,
                                                std::complex<double>* y, std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*column_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                std::int64_t m, std::int64_t n, std::int64_t kl,
                                                std::int64_t ku, float alpha, const float* a,
                                                std::int64_t lda, const float* x, std::int64_t incx,
                                                float beta, float* y, std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*column_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                std::int64_t m, std::int64_t n, std::int64_t kl,
                                                std::int64_t ku, double alpha, const double* a,
                                                std::int64_t lda, const double* x, std::int64_t incx,
                                                double beta, double* y, std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgbmv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::int64_t kl, std::int64_t ku, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
         std::complex<float>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*column_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                std::int64_t m, std::int64_t n, std::int64_t kl,
                                                std::int64_t ku, std::complex<double> alpha,
                                                const std::complex<double>* a, std::int64_t lda,
@@ -1434,114 +1428,114 @@ typedef struct {
                                                std::complex<double> beta, std::complex<double>* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*column_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                std::int64_t m, std::int64_t n, float alpha,
                                                const float* a, std::int64_t lda, const float* x,
                                                std::int64_t incx, float beta, float* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*column_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                std::int64_t m, std::int64_t n, double alpha,
                                                const double* a, std::int64_t lda, const double* x,
                                                std::int64_t incx, double beta, double* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgemv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
         std::complex<float>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zgemv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
         std::complex<double>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_sgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, const float* a, std::int64_t lda, std::int64_t stridea, const float* x,
         std::int64_t incx, std::int64_t stridex, float beta, float* y, std::int64_t incy,
         std::int64_t stridey, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, const double* a, std::int64_t lda, std::int64_t stridea, const double* x,
         std::int64_t incx, std::int64_t stridex, double beta, double* y, std::int64_t incy,
         std::int64_t stridey, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::int64_t stridea, const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
         std::complex<float> beta, std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::int64_t stridea, const std::complex<double>* x, std::int64_t incx,
         std::int64_t stridex, std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
         std::int64_t stridey, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_sgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         float* alpha, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx,
         float* beta, float** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         double* alpha, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx,
         double* beta, double** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
         const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
         std::complex<float>** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<double>* alpha, const std::complex<double>** a, std::int64_t* lda,
         const std::complex<double>** x, std::int64_t* incx, std::complex<double>* beta,
         std::complex<double>** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_sdgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const float* a, std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx,
         std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ddgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const double* a, std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx,
         std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cdgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
         const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
         std::complex<float>* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zdgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const std::complex<double>* a, std::int64_t lda, std::int64_t stridea,
         const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
         std::complex<double>* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_sdgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ddgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cdgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const std::complex<float>** a, std::int64_t* lda, const std::complex<float>** x,
         std::int64_t* incx, std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zdgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const std::complex<double>** a, std::int64_t* lda, const std::complex<double>** x,
         std::int64_t* incx, std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
@@ -1580,932 +1574,935 @@ typedef struct {
                                                std::complex<double>* a, std::int64_t lda,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_chbmv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
         std::complex<float>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zhbmv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
         std::complex<double>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<float> alpha,
                                                const std::complex<float>* a, std::int64_t lda,
                                                const std::complex<float>* x, std::int64_t incx,
                                                std::complex<float> beta, std::complex<float>* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<double> alpha,
                                                const std::complex<double>* a, std::int64_t lda,
                                                const std::complex<double>* x, std::int64_t incx,
                                                std::complex<double> beta, std::complex<double>* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_cher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_cher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, float alpha,
                                               const std::complex<float>* x, std::int64_t incx,
                                               std::complex<float>* a, std::int64_t lda,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_zher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, double alpha,
                                               const std::complex<double>* x, std::int64_t incx,
                                               std::complex<double>* a, std::int64_t lda,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<float> alpha,
                                                const std::complex<float>* x, std::int64_t incx,
                                                const std::complex<float>* y, std::int64_t incy,
                                                std::complex<float>* a, std::int64_t lda,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<double> alpha,
                                                const std::complex<double>* x, std::int64_t incx,
                                                const std::complex<double>* y, std::int64_t incy,
                                                std::complex<double>* a, std::int64_t lda,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<float> alpha,
                                                const std::complex<float>* a,
                                                const std::complex<float>* x, std::int64_t incx,
                                                std::complex<float> beta, std::complex<float>* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<double> alpha,
                                                const std::complex<double>* a,
                                                const std::complex<double>* x, std::int64_t incx,
                                                std::complex<double> beta, std::complex<double>* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, float alpha,
                                               const std::complex<float>* x, std::int64_t incx,
                                               std::complex<float>* a,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, double alpha,
                                               const std::complex<double>* x, std::int64_t incx,
                                               std::complex<double>* a,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<float> alpha,
                                                const std::complex<float>* x, std::int64_t incx,
                                                const std::complex<float>* y, std::int64_t incy,
                                                std::complex<float>* a,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::complex<double> alpha,
                                                const std::complex<double>* x, std::int64_t incx,
                                                const std::complex<double>* y, std::int64_t incy,
                                                std::complex<double>* a,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::int64_t k, float alpha,
                                                const float* a, std::int64_t lda, const float* x,
                                                std::int64_t incx, float beta, float* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, std::int64_t k, double alpha,
                                                const double* a, std::int64_t lda, const double* x,
                                                std::int64_t incx, double beta, double* y,
                                                std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, float alpha, const float* a,
                                                const float* x, std::int64_t incx, float beta,
                                                float* y, std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, double alpha, const double* a,
                                                const double* x, std::int64_t incx, double beta,
                                                double* y, std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, float alpha, const float* x,
                                               std::int64_t incx, float* a,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, double alpha, const double* x,
                                               std::int64_t incx, double* a,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, float alpha, const float* x,
                                                std::int64_t incx, const float* y, std::int64_t incy,
                                                float* a,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, double alpha, const double* x,
                                                std::int64_t incx, const double* y,
                                                std::int64_t incy, double* a,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, float alpha, const float* a,
                                                std::int64_t lda, const float* x, std::int64_t incx,
                                                float beta, float* y, std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, double alpha, const double* a,
                                                std::int64_t lda, const double* x, std::int64_t incx,
                                                double beta, double* y, std::int64_t incy,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, float alpha, const float* x,
                                               std::int64_t incx, float* a, std::int64_t lda,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                               std::int64_t n, double alpha, const double* x,
                                               std::int64_t incx, double* a, std::int64_t lda,
                                               const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, float alpha, const float* x,
                                                std::int64_t incx, const float* y, std::int64_t incy,
                                                float* a, std::int64_t lda,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*column_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                                std::int64_t n, double alpha, const double* x,
                                                std::int64_t incx, const double* y,
                                                std::int64_t incy, double* a, std::int64_t lda,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const float* a, std::int64_t lda,
                                                float* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const double* a, std::int64_t lda,
                                                double* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const std::complex<float>* a,
                                                std::int64_t lda, std::complex<float>* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const std::complex<double>* a,
                                                std::int64_t lda, std::complex<double>* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const float* a, std::int64_t lda,
                                                float* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const double* a, std::int64_t lda,
                                                double* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const std::complex<float>* a,
                                                std::int64_t lda, std::complex<float>* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                std::int64_t k, const std::complex<double>* a,
                                                std::int64_t lda, std::complex<double>* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const float* a, float* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const double* a, double* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<float>* a, std::complex<float>* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<double>* a,
                                                std::complex<double>* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const float* a, float* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const double* a, double* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<float>* a, std::complex<float>* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<double>* a,
                                                std::complex<double>* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const float* a, std::int64_t lda, float* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const double* a, std::int64_t lda, double* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<float>* a, std::int64_t lda,
                                                std::complex<float>* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<double>* a, std::int64_t lda,
                                                std::complex<double>* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const float* a, std::int64_t lda, float* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const double* a, std::int64_t lda, double* x,
                                                std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<float>* a, std::int64_t lda,
                                                std::complex<float>* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*column_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t n,
                                                const std::complex<double>* a, std::int64_t lda,
                                                std::complex<double>* x, std::int64_t incx,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*column_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::int64_t k, float alpha,
                                                const float* a, std::int64_t lda, const float* b,
                                                std::int64_t ldb, float beta, float* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*column_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::int64_t k, double alpha,
                                                const double* a, std::int64_t lda, const double* b,
                                                std::int64_t ldb, double beta, double* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgemm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
         const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
         std::int64_t ldb, std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zgemm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
         std::int64_t ldb, std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
         const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*column_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::int64_t k, sycl::half alpha,
                                                const sycl::half* a, std::int64_t lda,
                                                const sycl::half* b, std::int64_t ldb,
                                                sycl::half beta, sycl::half* c, std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_f16f16f32_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
         std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
         std::int64_t ldc, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_bf16bf16f32_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::mkl::bfloat16* a,
-        std::int64_t lda, const oneapi::mkl::bfloat16* b, std::int64_t ldb, float beta, float* c,
-        std::int64_t ldc, const std::vector<sycl::event>& dependencies);
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b,
+        std::int64_t ldb, float beta, float* c, std::int64_t ldc,
+        const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_chemm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*column_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower, std::int64_t m,
                                                std::int64_t n, std::complex<double> alpha,
                                                const std::complex<double>* a, std::int64_t lda,
                                                const std::complex<double>* b, std::int64_t ldb,
                                                std::complex<double> beta, std::complex<double>* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, float alpha,
                                                const std::complex<float>* a, std::int64_t lda,
                                                float beta, std::complex<float>* c, std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, double alpha,
                                                const std::complex<double>* a, std::int64_t lda,
                                                double beta, std::complex<double>* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cher2k_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, float beta,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zher2k_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<double> alpha, const std::complex<double>* a,
         std::int64_t lda, const std::complex<double>* b, std::int64_t ldb, double beta,
         std::complex<double>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*column_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower, std::int64_t m,
                                                std::int64_t n, float alpha, const float* a,
                                                std::int64_t lda, const float* b, std::int64_t ldb,
                                                float beta, float* c, std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*column_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower, std::int64_t m,
                                                std::int64_t n, double alpha, const double* a,
                                                std::int64_t lda, const double* b, std::int64_t ldb,
                                                double beta, double* c, std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_csymm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*column_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower, std::int64_t m,
                                                std::int64_t n, std::complex<double> alpha,
                                                const std::complex<double>* a, std::int64_t lda,
                                                const std::complex<double>* b, std::int64_t ldb,
                                                std::complex<double> beta, std::complex<double>* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, float alpha, const float* a,
                                                std::int64_t lda, float beta, float* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, double alpha, const double* a,
                                                std::int64_t lda, double beta, double* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, std::complex<float> alpha,
                                                const std::complex<float>* a, std::int64_t lda,
                                                std::complex<float> beta, std::complex<float>* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, std::complex<double> alpha,
                                                const std::complex<double>* a, std::int64_t lda,
                                                std::complex<double> beta, std::complex<double>* c,
                                                std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ssyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda,
         float* beta, float** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dsyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda,
         double* beta, double** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_csyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, std::complex<float>* alpha, const std::complex<float>** a,
         std::int64_t* lda, std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
         std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zsyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
         const std::complex<double>** a, std::int64_t* lda, std::complex<double>* beta,
         std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ssyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
         std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dsyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
         std::int64_t stride_a, double beta, double* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_csyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<float> beta, std::complex<float>* c,
         std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zsyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<double> alpha, const std::complex<double>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<double> beta, std::complex<double>* c,
         std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                                oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                                oneapi::math::transpose trans, std::int64_t n,
                                                 std::int64_t k, float alpha, const float* a,
                                                 std::int64_t lda, const float* b, std::int64_t ldb,
                                                 float beta, float* c, std::int64_t ldc,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                                oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                                oneapi::math::transpose trans, std::int64_t n,
                                                 std::int64_t k, double alpha, const double* a,
                                                 std::int64_t lda, const double* b, std::int64_t ldb,
                                                 double beta, double* c, std::int64_t ldc,
                                                 const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_csyr2k_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                                oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*column_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                                oneapi::math::transpose trans, std::int64_t n,
                                                 std::int64_t k, std::complex<double> alpha,
                                                 const std::complex<double>* a, std::int64_t lda,
                                                 const std::complex<double>* b, std::int64_t ldb,
                                                 std::complex<double> beta, std::complex<double>* c,
                                                 std::int64_t ldc,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*column_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t m,
                                                std::int64_t n, float alpha, const float* a,
                                                std::int64_t lda, float* b, std::int64_t ldb,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*column_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t m,
                                                std::int64_t n, double alpha, const double* a,
                                                std::int64_t lda, double* b, std::int64_t ldb,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ctrmm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::complex<float>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ztrmm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::complex<double>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*column_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t m,
                                                std::int64_t n, float alpha, const float* a,
                                                std::int64_t lda, float* b, std::int64_t ldb,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                               oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans,
-                                               oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*column_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                               oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans,
+                                               oneapi::math::diag unit_diag, std::int64_t m,
                                                std::int64_t n, double alpha, const double* a,
                                                std::int64_t lda, double* b, std::int64_t ldb,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ctrsm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::complex<float>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ztrsm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::complex<double>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_strsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dtrsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ctrsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ztrsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_strsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dtrsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b,
         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ctrsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, std::complex<float>* alpha, const std::complex<float>** a,
         std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_ztrsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, std::complex<double>* alpha, const std::complex<double>** a,
         std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_sgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const float** a,
         std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, double* alpha, const double** a,
         std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, double** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex<float>* alpha,
         const std::complex<float>** a, std::int64_t* lda, const std::complex<float>** b,
         std::int64_t* ldb, std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
         std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
         const std::complex<double>** a, std::int64_t* lda, const std::complex<double>** b,
         std::int64_t* ldb, std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
         std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_hgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a,
         std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta,
         sycl::half** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_f16f16f32_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a,
         std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_s8s8f32_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a,
         std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_s8s8s32_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a,
         std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_sgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a,
         std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a,
         std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb,
         std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
         const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
         const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<float> beta, std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
         const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<double> beta, std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_hgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a,
         std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
         std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_f16f16f32_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
         std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_s8s8f32_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a,
         std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_s8s8s32_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a,
         std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                                oneapi::mkl::transpose transa,
-                                                oneapi::mkl::transpose transb, std::int64_t n,
+    sycl::event (*column_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                                oneapi::math::transpose transa,
+                                                oneapi::math::transpose transb, std::int64_t n,
                                                 std::int64_t k, float alpha, const float* a,
                                                 std::int64_t lda, const float* b, std::int64_t ldb,
                                                 float beta, float* c, std::int64_t ldc,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                                oneapi::mkl::transpose transa,
-                                                oneapi::mkl::transpose transb, std::int64_t n,
+    sycl::event (*column_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                                oneapi::math::transpose transa,
+                                                oneapi::math::transpose transb, std::int64_t n,
                                                 std::int64_t k, double alpha, const double* a,
                                                 std::int64_t lda, const double* b, std::int64_t ldb,
                                                 double beta, double* c, std::int64_t ldc,
                                                 const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cgemmt_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-        oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex<float> alpha,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+        oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex<float> alpha,
         const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
         std::int64_t ldb, std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zgemmt_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-        oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex<double> alpha,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+        oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
         std::int64_t ldb, std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_s8u8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b,
         std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_s8s8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b,
         std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_u8s8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::int8_t* b,
         std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_gemm_u8u8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::uint8_t* b,
         std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_somatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_domatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_comatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zomatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_simatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dimatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cimatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
         std::int64_t stride, std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zimatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
         std::int64_t stride, std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_somatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
         std::int64_t stride_a, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b,
         float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_domatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
         std::int64_t stride_a, double beta, const double* b, std::int64_t ldb,
         std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_comatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
         const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zomatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
         const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
 
-    sycl::event (*column_major_somatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, float alpha,
-                                                   const float* a, std::int64_t lda, float* b,
-                                                   std::int64_t ldb,
+    sycl::event (*column_major_somatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, float alpha, const float* a,
+                                                   std::int64_t lda, float* b, std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_domatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, double alpha,
-                                                   const double* a, std::int64_t lda, double* b,
-                                                   std::int64_t ldb,
+    sycl::event (*column_major_domatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, double alpha, const double* a,
+                                                   std::int64_t lda, double* b, std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_comatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n,
-                                                   std::complex<float> alpha,
+    sycl::event (*column_major_comatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, std::complex<float> alpha,
                                                    const std::complex<float>* a, std::int64_t lda,
                                                    std::complex<float>* b, std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zomatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n,
-                                                   std::complex<double> alpha,
+    sycl::event (*column_major_zomatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, std::complex<double> alpha,
                                                    const std::complex<double>* a, std::int64_t lda,
                                                    std::complex<double>* b, std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_somatcopy2_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, const float* a, std::int64_t lda, std::int64_t stridea, float* b,
         std::int64_t ldb, std::int64_t strideb, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_domatcopy2_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, const double* a, std::int64_t lda, std::int64_t stridea, double* b,
         std::int64_t ldb, std::int64_t strideb, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_comatcopy2_usm_sycl)(sycl::queue& queue,
-                                                    oneapi::mkl::transpose trans, std::int64_t m,
+                                                    oneapi::math::transpose trans, std::int64_t m,
                                                     std::int64_t n, std::complex<float> alpha,
                                                     const std::complex<float>* a, std::int64_t lda,
                                                     std::int64_t stridea, std::complex<float>* b,
                                                     std::int64_t ldb, std::int64_t strideb,
                                                     const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zomatcopy2_usm_sycl)(sycl::queue& queue,
-                                                    oneapi::mkl::transpose trans, std::int64_t m,
+                                                    oneapi::math::transpose trans, std::int64_t m,
                                                     std::int64_t n, std::complex<double> alpha,
                                                     const std::complex<double>* a, std::int64_t lda,
                                                     std::int64_t stridea, std::complex<double>* b,
                                                     std::int64_t ldb, std::int64_t strideb,
                                                     const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_simatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, float alpha,
-                                                   float* ab, std::int64_t lda, std::int64_t ldb,
+    sycl::event (*column_major_simatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, float alpha, float* ab,
+                                                   std::int64_t lda, std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_dimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, double alpha,
-                                                   double* ab, std::int64_t lda, std::int64_t ldb,
+    sycl::event (*column_major_dimatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, double alpha, double* ab,
+                                                   std::int64_t lda, std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_cimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n,
-                                                   std::complex<float> alpha,
+    sycl::event (*column_major_cimatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, std::complex<float> alpha,
                                                    std::complex<float>* ab, std::int64_t lda,
                                                    std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n,
-                                                   std::complex<double> alpha,
+    sycl::event (*column_major_zimatcopy_usm_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, std::complex<double> alpha,
                                                    std::complex<double>* ab, std::int64_t lda,
                                                    std::int64_t ldb,
                                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_somatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                                  oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*column_major_somatadd_usm_sycl)(sycl::queue& queue,
+                                                  oneapi::math::transpose transa,
+                                                  oneapi::math::transpose transb, std::int64_t m,
                                                   std::int64_t n, float alpha, const float* a,
                                                   std::int64_t lda, float beta, const float* b,
                                                   std::int64_t ldb, float* c, std::int64_t ldc,
                                                   const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_domatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                                  oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*column_major_domatadd_usm_sycl)(sycl::queue& queue,
+                                                  oneapi::math::transpose transa,
+                                                  oneapi::math::transpose transb, std::int64_t m,
                                                   std::int64_t n, double alpha, const double* a,
                                                   std::int64_t lda, double beta, const double* b,
                                                   std::int64_t ldb, double* c, std::int64_t ldc,
                                                   const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_comatadd_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, std::complex<float> beta, const std::complex<float>* b, std::int64_t ldb,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*column_major_zomatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                                  oneapi::mkl::transpose transb, std::int64_t m,
-                                                  std::int64_t n, std::complex<double> alpha,
-                                                  const std::complex<double>* a, std::int64_t lda,
-                                                  std::complex<double> beta,
-                                                  const std::complex<double>* b, std::int64_t ldb,
-                                                  std::complex<double>* c, std::int64_t ldc,
-                                                  const std::vector<sycl::event>& dependencies);
+    sycl::event (*column_major_zomatadd_usm_sycl)(
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        std::int64_t m, std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
+        std::int64_t lda, std::complex<double> beta, const std::complex<double>* b,
+        std::int64_t ldb, std::complex<double>* c, std::int64_t ldc,
+        const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_somatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_domatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_comatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
         std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zomatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<double>* alpha, const std::complex<double>** a, std::int64_t* lda,
         std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_simatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_dimatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_cimatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<float>* alpha, std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*column_major_zimatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<double>* alpha, std::complex<double>** ab, std::int64_t* lda,
         std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
@@ -2724,57 +2721,57 @@ typedef struct {
     void (*row_major_zswap_sycl)(sycl::queue& queue, std::int64_t n,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*row_major_sgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_sgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha,
                                  sycl::buffer<float, 1>& a, std::int64_t lda,
                                  sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
                                  sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*row_major_dgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_dgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha,
                                  sycl::buffer<double, 1>& a, std::int64_t lda,
                                  sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
                                  sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*row_major_cgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_cgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, std::int64_t kl, std::int64_t ku,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
                                  std::int64_t incx, std::complex<float> beta,
                                  sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-    void (*row_major_zgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_zgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, std::int64_t kl, std::int64_t ku,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*row_major_sgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_sgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, float alpha, sycl::buffer<float, 1>& a,
                                  std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
                                  float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*row_major_dgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_dgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, double alpha, sycl::buffer<double, 1>& a,
                                  std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
                                  double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*row_major_cgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_cgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, std::complex<float> alpha,
                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                  std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
                                  std::int64_t incy);
-    void (*row_major_zgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m,
+    void (*row_major_zgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m,
                                  std::int64_t n, std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*row_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                std::int64_t m, std::int64_t n, float alpha,
                                                sycl::buffer<float, 1>& a, std::int64_t lda,
                                                std::int64_t stridea, sycl::buffer<float, 1>& x,
                                                std::int64_t incx, std::int64_t stridex, float beta,
                                                sycl::buffer<float, 1>& y, std::int64_t incy,
                                                std::int64_t stridey, std::int64_t batch_size);
-    void (*row_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                std::int64_t m, std::int64_t n, double alpha,
                                                sycl::buffer<double, 1>& a, std::int64_t lda,
                                                std::int64_t stridea, sycl::buffer<double, 1>& x,
@@ -2782,25 +2779,25 @@ typedef struct {
                                                sycl::buffer<double, 1>& y, std::int64_t incy,
                                                std::int64_t stridey, std::int64_t batch_size);
     void (*row_major_cgemv_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
         std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
         std::int64_t stridex, std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
         std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
     void (*row_major_zgemv_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
         std::int64_t stridea, sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
         std::int64_t stridex, std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& y,
         std::int64_t incy, std::int64_t stridey, std::int64_t batch_size);
-    void (*row_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
+    void (*row_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right,
                                                std::int64_t m, std::int64_t n,
                                                sycl::buffer<float, 1>& a, std::int64_t lda,
                                                std::int64_t stridea, sycl::buffer<float, 1>& x,
                                                std::int64_t incx, std::int64_t stridex,
                                                sycl::buffer<float, 1>& c, std::int64_t ldc,
                                                std::int64_t stridec, std::int64_t batch_size);
-    void (*row_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
+    void (*row_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right,
                                                std::int64_t m, std::int64_t n,
                                                sycl::buffer<double, 1>& a, std::int64_t lda,
                                                std::int64_t stridea, sycl::buffer<double, 1>& x,
@@ -2808,13 +2805,13 @@ typedef struct {
                                                sycl::buffer<double, 1>& c, std::int64_t ldc,
                                                std::int64_t stridec, std::int64_t batch_size);
     void (*row_major_cdgmm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stridea,
         sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx, std::int64_t stridex,
         sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc, std::int64_t stridec,
         std::int64_t batch_size);
     void (*row_major_zdgmm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stridea,
         sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx, std::int64_t stridex,
         sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc, std::int64_t stridec,
@@ -2847,455 +2844,455 @@ typedef struct {
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-    void (*row_major_chbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_chbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::int64_t k, std::complex<float> alpha,
                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                  std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
                                  std::int64_t incy);
-    void (*row_major_zhbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_zhbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::int64_t k, std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*row_major_chemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_chemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
                                  std::int64_t incx, std::complex<float> beta,
                                  sycl::buffer<std::complex<float>, 1>& y, std::int64_t incy);
-    void (*row_major_zhemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_zhemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*row_major_cher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_cher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 float alpha, sycl::buffer<std::complex<float>, 1>& x,
                                 std::int64_t incx, sycl::buffer<std::complex<float>, 1>& a,
                                 std::int64_t lda);
-    void (*row_major_zher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_zher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 double alpha, sycl::buffer<std::complex<double>, 1>& x,
                                 std::int64_t incx, sycl::buffer<std::complex<double>, 1>& a,
                                 std::int64_t lda);
-    void (*row_major_cher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_cher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                                  std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
                                  std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda);
-    void (*row_major_zher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_zher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda);
-    void (*row_major_chpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_chpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx,
                                  std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& y,
                                  std::int64_t incy);
-    void (*row_major_zhpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_zhpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy);
-    void (*row_major_chpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_chpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 float alpha, sycl::buffer<std::complex<float>, 1>& x,
                                 std::int64_t incx, sycl::buffer<std::complex<float>, 1>& a);
-    void (*row_major_zhpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_zhpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 double alpha, sycl::buffer<std::complex<double>, 1>& x,
                                 std::int64_t incx, sycl::buffer<std::complex<double>, 1>& a);
-    void (*row_major_chpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_chpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& x,
                                  std::int64_t incx, sycl::buffer<std::complex<float>, 1>& y,
                                  std::int64_t incy, sycl::buffer<std::complex<float>, 1>& a);
-    void (*row_major_zhpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_zhpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx,
                                  sycl::buffer<std::complex<double>, 1>& y, std::int64_t incy,
                                  sycl::buffer<std::complex<double>, 1>& a);
-    void (*row_major_ssbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_ssbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                                  std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx,
                                  float beta, sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*row_major_dsbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_dsbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                                  std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx,
                                  double beta, sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*row_major_sspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_sspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  float alpha, sycl::buffer<float, 1>& a, sycl::buffer<float, 1>& x,
                                  std::int64_t incx, float beta, sycl::buffer<float, 1>& y,
                                  std::int64_t incy);
-    void (*row_major_dspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_dspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  double alpha, sycl::buffer<double, 1>& a,
                                  sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
                                  sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*row_major_sspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_sspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
                                 sycl::buffer<float, 1>& a);
-    void (*row_major_dspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_dspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
                                 sycl::buffer<double, 1>& a);
-    void (*row_major_sspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_sspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
                                  sycl::buffer<float, 1>& y, std::int64_t incy,
                                  sycl::buffer<float, 1>& a);
-    void (*row_major_dspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_dspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
                                  sycl::buffer<double, 1>& y, std::int64_t incy,
                                  sycl::buffer<double, 1>& a);
-    void (*row_major_ssymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_ssymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                  sycl::buffer<float, 1>& x, std::int64_t incx, float beta,
                                  sycl::buffer<float, 1>& y, std::int64_t incy);
-    void (*row_major_dsymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_dsymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                  sycl::buffer<double, 1>& x, std::int64_t incx, double beta,
                                  sycl::buffer<double, 1>& y, std::int64_t incy);
-    void (*row_major_ssyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_ssyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
                                 sycl::buffer<float, 1>& a, std::int64_t lda);
-    void (*row_major_dsyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_dsyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                 double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
                                 sycl::buffer<double, 1>& a, std::int64_t lda);
-    void (*row_major_ssyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_ssyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  float alpha, sycl::buffer<float, 1>& x, std::int64_t incx,
                                  sycl::buffer<float, 1>& y, std::int64_t incy,
                                  sycl::buffer<float, 1>& a, std::int64_t lda);
-    void (*row_major_dsyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n,
+    void (*row_major_dsyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n,
                                  double alpha, sycl::buffer<double, 1>& x, std::int64_t incx,
                                  sycl::buffer<double, 1>& y, std::int64_t incy,
                                  sycl::buffer<double, 1>& a, std::int64_t lda);
-    void (*row_major_stbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_stbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
                                  std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*row_major_dtbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_dtbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
                                  std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*row_major_ctbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ctbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k,
                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*row_major_ztbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ztbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*row_major_stbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_stbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k, sycl::buffer<float, 1>& a,
                                  std::int64_t lda, sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*row_major_dtbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_dtbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k, sycl::buffer<double, 1>& a,
                                  std::int64_t lda, sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*row_major_ctbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ctbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k,
                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*row_major_ztbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ztbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, std::int64_t k,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*row_major_stpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_stpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<float, 1>& a,
                                  sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*row_major_dtpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_dtpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<double, 1>& a,
                                  sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*row_major_ctpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ctpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                  sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*row_major_ztpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ztpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*row_major_stpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_stpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<float, 1>& a,
                                  sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*row_major_dtpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_dtpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<double, 1>& a,
                                  sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*row_major_ctpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ctpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                  sycl::buffer<std::complex<float>, 1>& x, std::int64_t incx);
-    void (*row_major_ztpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ztpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                  sycl::buffer<std::complex<double>, 1>& x, std::int64_t incx);
-    void (*row_major_strmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_strmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
                                  sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*row_major_dtrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_dtrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
                                  sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*row_major_ctrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ctrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
                                  std::int64_t incx);
-    void (*row_major_ztrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ztrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
                                  std::int64_t incx);
-    void (*row_major_strsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_strsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<float, 1>& a, std::int64_t lda,
                                  sycl::buffer<float, 1>& x, std::int64_t incx);
-    void (*row_major_dtrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_dtrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<double, 1>& a, std::int64_t lda,
                                  sycl::buffer<double, 1>& x, std::int64_t incx);
-    void (*row_major_ctrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ctrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& x,
                                  std::int64_t incx);
-    void (*row_major_ztrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag,
+    void (*row_major_ztrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, oneapi::math::diag unit_diag,
                                  std::int64_t n, sycl::buffer<std::complex<double>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<double>, 1>& x,
                                  std::int64_t incx);
-    void (*row_major_sgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_sgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                 oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                  std::int64_t k, float alpha, sycl::buffer<float, 1>& a,
                                  std::int64_t lda, sycl::buffer<float, 1>& b, std::int64_t ldb,
                                  float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*row_major_dgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_dgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                 oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                  std::int64_t k, double alpha, sycl::buffer<double, 1>& a,
                                  std::int64_t lda, sycl::buffer<double, 1>& b, std::int64_t ldb,
                                  double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*row_major_cgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_cgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                 oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                  std::int64_t k, std::complex<float> alpha,
                                  sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                  std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
                                  std::int64_t ldc);
-    void (*row_major_zgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_zgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                 oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                  std::int64_t k, std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*row_major_hgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                 oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_hgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                 oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                  std::int64_t k, sycl::half alpha, sycl::buffer<sycl::half, 1>& a,
                                  std::int64_t lda, sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
                                  sycl::half beta, sycl::buffer<sycl::half, 1>& c, std::int64_t ldc);
-    void (*row_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                          oneapi::mkl::transpose transb, std::int64_t m,
+    void (*row_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                          oneapi::math::transpose transb, std::int64_t m,
                                           std::int64_t n, std::int64_t k, float alpha,
                                           sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
                                           sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
                                           float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*row_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                            oneapi::mkl::transpose transb, std::int64_t m,
+    void (*row_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                            oneapi::math::transpose transb, std::int64_t m,
                                             std::int64_t n, std::int64_t k, float alpha,
-                                            sycl::buffer<oneapi::mkl::bfloat16, 1>& a,
+                                            sycl::buffer<oneapi::math::bfloat16, 1>& a,
                                             std::int64_t lda,
-                                            sycl::buffer<oneapi::mkl::bfloat16, 1>& b,
+                                            sycl::buffer<oneapi::math::bfloat16, 1>& b,
                                             std::int64_t ldb, float beta, sycl::buffer<float, 1>& c,
                                             std::int64_t ldc);
-    void (*row_major_chemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*row_major_chemm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
                                  std::int64_t ldb, std::complex<float> beta,
                                  sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*row_major_zhemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*row_major_zhemm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*row_major_cherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_cherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                  float alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, float beta,
                                  sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*row_major_zherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_zherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                  double alpha, sycl::buffer<std::complex<double>, 1>& a,
                                  std::int64_t lda, double beta,
                                  sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*row_major_cher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_cher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                   std::complex<float> alpha,
                                   sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                   sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                   float beta, sycl::buffer<std::complex<float>, 1>& c,
                                   std::int64_t ldc);
-    void (*row_major_zher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_zher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                   std::complex<double> alpha,
                                   sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                   sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                   double beta, sycl::buffer<std::complex<double>, 1>& c,
                                   std::int64_t ldc);
-    void (*row_major_ssymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*row_major_ssymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                  float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                  sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
                                  sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*row_major_dsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*row_major_dsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                  double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                  sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
                                  sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*row_major_csymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*row_major_csymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
                                  std::int64_t ldb, std::complex<float> beta,
                                  sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*row_major_zsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n,
+    void (*row_major_zsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*row_major_ssyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_ssyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                  float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                  float beta, sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*row_major_dsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_dsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                  double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                  double beta, sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*row_major_csyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_csyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, std::complex<float> beta,
                                  sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*row_major_zsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                 oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_zsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                 oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  std::complex<double> beta,
                                  sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*row_major_ssyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    void (*row_major_ssyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, float alpha,
                                                sycl::buffer<float, 1>& a, std::int64_t lda,
                                                std::int64_t stride_a, float beta,
                                                sycl::buffer<float, 1>& c, std::int64_t ldc,
                                                std::int64_t stride_c, std::int64_t batch_size);
-    void (*row_major_dsyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                               oneapi::mkl::transpose trans, std::int64_t n,
+    void (*row_major_dsyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                               oneapi::math::transpose trans, std::int64_t n,
                                                std::int64_t k, double alpha,
                                                sycl::buffer<double, 1>& a, std::int64_t lda,
                                                std::int64_t stride_a, double beta,
                                                sycl::buffer<double, 1>& c, std::int64_t ldc,
                                                std::int64_t stride_c, std::int64_t batch_size);
     void (*row_major_csyrk_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<float> alpha,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*row_major_zsyrk_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<double> alpha,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
-    void (*row_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                   float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                   sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
                                   sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*row_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                   double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                   sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
                                   sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*row_major_csyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_csyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                   std::complex<float> alpha,
                                   sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                   sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                   std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
                                   std::int64_t ldc);
-    void (*row_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k,
+    void (*row_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose trans, std::int64_t n, std::int64_t k,
                                   std::complex<double> alpha,
                                   sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                   sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                   std::complex<double> beta,
                                   sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
-    void (*row_major_strmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_strmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                  sycl::buffer<float, 1>& b, std::int64_t ldb);
-    void (*row_major_dtrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_dtrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                  sycl::buffer<double, 1>& b, std::int64_t ldb);
-    void (*row_major_ctrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_ctrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
                                  std::int64_t ldb);
-    void (*row_major_ztrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_ztrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-    void (*row_major_strsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_strsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                  sycl::buffer<float, 1>& b, std::int64_t ldb);
-    void (*row_major_dtrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_dtrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                  sycl::buffer<double, 1>& b, std::int64_t ldb);
-    void (*row_major_ctrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_ctrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a,
                                  std::int64_t lda, sycl::buffer<std::complex<float>, 1>& b,
                                  std::int64_t ldb);
-    void (*row_major_ztrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                 oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
-                                 oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+    void (*row_major_ztrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                 oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
+                                 oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
                                  std::complex<double> alpha,
                                  sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                  sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-    void (*row_major_sgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    void (*row_major_sgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::int64_t k, float alpha,
                                                sycl::buffer<float, 1>& a, std::int64_t lda,
                                                std::int64_t stride_a, sycl::buffer<float, 1>& b,
                                                std::int64_t ldb, std::int64_t stride_b, float beta,
                                                sycl::buffer<float, 1>& c, std::int64_t ldc,
                                                std::int64_t stride_c, std::int64_t batch_size);
-    void (*row_major_dgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    void (*row_major_dgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::int64_t k, double alpha,
                                                sycl::buffer<double, 1>& a, std::int64_t lda,
                                                std::int64_t stride_a, sycl::buffer<double, 1>& b,
@@ -3303,21 +3300,21 @@ typedef struct {
                                                sycl::buffer<double, 1>& c, std::int64_t ldc,
                                                std::int64_t stride_c, std::int64_t batch_size);
     void (*row_major_cgemm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*row_major_zgemm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
-    void (*row_major_hgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    void (*row_major_hgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::int64_t k, sycl::half alpha,
                                                sycl::buffer<sycl::half, 1>& a, std::int64_t lda,
                                                std::int64_t stride_a,
@@ -3326,242 +3323,246 @@ typedef struct {
                                                sycl::buffer<sycl::half, 1>& c, std::int64_t ldc,
                                                std::int64_t stride_c, std::int64_t batch_size);
     void (*row_major_gemm_f16f16f32_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer<sycl::half, 1>& a,
         std::int64_t lda, std::int64_t stride_a, sycl::buffer<sycl::half, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, float beta, sycl::buffer<float, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*row_major_gemm_s8s8f32_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
         sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*row_major_gemm_s8s8s32_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<std::int8_t, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<std::int8_t, 1>& b, std::int64_t ldb, std::int64_t stride_b, float beta,
         sycl::buffer<std::int32_t, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*row_major_strsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         float alpha, sycl::buffer<float, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<float, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size);
     void (*row_major_dtrsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         double alpha, sycl::buffer<double, 1>& a, std::int64_t lda, std::int64_t stride_a,
         sycl::buffer<double, 1>& b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size);
     void (*row_major_ctrsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     void (*row_major_ztrsm_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
-    void (*row_major_sgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*row_major_sgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose transa, oneapi::math::transpose transb,
                                   std::int64_t n, std::int64_t k, float alpha,
                                   sycl::buffer<float, 1>& a, std::int64_t lda,
                                   sycl::buffer<float, 1>& b, std::int64_t ldb, float beta,
                                   sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*row_major_dgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*row_major_dgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose transa, oneapi::math::transpose transb,
                                   std::int64_t n, std::int64_t k, double alpha,
                                   sycl::buffer<double, 1>& a, std::int64_t lda,
                                   sycl::buffer<double, 1>& b, std::int64_t ldb, double beta,
                                   sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*row_major_cgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*row_major_cgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose transa, oneapi::math::transpose transb,
                                   std::int64_t n, std::int64_t k, std::complex<float> alpha,
                                   sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                   sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                   std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& c,
                                   std::int64_t ldc);
-    void (*row_major_zgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                  oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+    void (*row_major_zgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                  oneapi::math::transpose transa, oneapi::math::transpose transb,
                                   std::int64_t n, std::int64_t k, std::complex<double> alpha,
                                   sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                   sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                   std::complex<double> beta,
                                   sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc);
     void (*row_major_gemm_s8u8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<uint8_t, 1>& b,
         std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
     void (*row_major_gemm_s8s8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<int8_t, 1>& a, std::int64_t lda, int8_t ao, sycl::buffer<int8_t, 1>& b,
         std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
     void (*row_major_gemm_u8s8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao, sycl::buffer<int8_t, 1>& b,
         std::int64_t ldb, int8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
     void (*row_major_gemm_u8u8s32_bias_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         sycl::buffer<uint8_t, 1>& a, std::int64_t lda, uint8_t ao, sycl::buffer<uint8_t, 1>& b,
         std::int64_t ldb, uint8_t bo, float beta, sycl::buffer<int32_t, 1>& c, std::int64_t ldc,
         sycl::buffer<int32_t, 1>& co);
-    void (*row_major_somatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, float alpha,
+    void (*row_major_somatcopy_batch_strided_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, float alpha,
                                                    sycl::buffer<float, 1>& a, std::int64_t lda,
                                                    std::int64_t stride_a, sycl::buffer<float, 1>& b,
                                                    std::int64_t ldb, std::int64_t stride_b,
                                                    std::int64_t batch_size);
-    void (*row_major_domatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, double alpha,
+    void (*row_major_domatcopy_batch_strided_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, double alpha,
                                                    sycl::buffer<double, 1>& a, std::int64_t lda,
                                                    std::int64_t stride_a,
                                                    sycl::buffer<double, 1>& b, std::int64_t ldb,
                                                    std::int64_t stride_b, std::int64_t batch_size);
     void (*row_major_comatcopy_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     void (*row_major_zomatcopy_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
         std::int64_t stride_a, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
-    void (*row_major_simatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, float alpha,
+    void (*row_major_simatcopy_batch_strided_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, float alpha,
                                                    sycl::buffer<float, 1>& ab, std::int64_t lda,
                                                    std::int64_t ldb, std::int64_t stride,
                                                    std::int64_t batch_size);
-    void (*row_major_dimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n, double alpha,
+    void (*row_major_dimatcopy_batch_strided_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, double alpha,
                                                    sycl::buffer<double, 1>& ab, std::int64_t lda,
                                                    std::int64_t ldb, std::int64_t stride,
                                                    std::int64_t batch_size);
-    void (*row_major_cimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n,
-                                                   std::complex<float> alpha,
+    void (*row_major_cimatcopy_batch_strided_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, std::complex<float> alpha,
                                                    sycl::buffer<std::complex<float>, 1>& ab,
                                                    std::int64_t lda, std::int64_t ldb,
                                                    std::int64_t stride, std::int64_t batch_size);
-    void (*row_major_zimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                                   std::int64_t m, std::int64_t n,
-                                                   std::complex<double> alpha,
+    void (*row_major_zimatcopy_batch_strided_sycl)(sycl::queue& queue,
+                                                   oneapi::math::transpose trans, std::int64_t m,
+                                                   std::int64_t n, std::complex<double> alpha,
                                                    sycl::buffer<std::complex<double>, 1>& ab,
                                                    std::int64_t lda, std::int64_t ldb,
                                                    std::int64_t stride, std::int64_t batch_size);
     void (*row_major_somatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
         std::int64_t stride_a, float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<float, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*row_major_domatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
         std::int64_t stride_a, double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<double, 1>& c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size);
     void (*row_major_comatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<float> alpha,
         sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<float> beta, sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
     void (*row_major_zomatadd_batch_strided_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<double> alpha,
         sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda, std::int64_t stride_a,
         std::complex<double> beta, sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
         std::int64_t stride_b, sycl::buffer<std::complex<double>, 1>& c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size);
 
-    void (*row_major_somatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_somatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, float alpha,
                                      sycl::buffer<float, 1>& a, std::int64_t lda,
                                      sycl::buffer<float, 1>& b, std::int64_t ldb);
-    void (*row_major_domatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_domatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, double alpha,
                                      sycl::buffer<double, 1>& a, std::int64_t lda,
                                      sycl::buffer<double, 1>& b, std::int64_t ldb);
-    void (*row_major_comatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_comatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, std::complex<float> alpha,
                                      sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb);
-    void (*row_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, std::complex<double> alpha,
                                      sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                      sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb);
-    void (*row_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t m, std::int64_t n, float alpha,
                                       sycl::buffer<float, 1>& a, std::int64_t lda,
                                       std::int64_t stridea, sycl::buffer<float, 1>& b,
                                       std::int64_t ldb, std::int64_t strideb);
-    void (*row_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t m, std::int64_t n, double alpha,
                                       sycl::buffer<double, 1>& a, std::int64_t lda,
                                       std::int64_t stridea, sycl::buffer<double, 1>& b,
                                       std::int64_t ldb, std::int64_t strideb);
-    void (*row_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t m, std::int64_t n, std::complex<float> alpha,
                                       sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                       std::int64_t stridea, sycl::buffer<std::complex<float>, 1>& b,
                                       std::int64_t ldb, std::int64_t strideb);
-    void (*row_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                       std::int64_t m, std::int64_t n, std::complex<double> alpha,
                                       sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                       std::int64_t stridea,
                                       sycl::buffer<std::complex<double>, 1>& b, std::int64_t ldb,
                                       std::int64_t strideb);
-    void (*row_major_simatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_simatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, float alpha,
                                      sycl::buffer<float, 1>& ab, std::int64_t lda,
                                      std::int64_t ldb);
-    void (*row_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, double alpha,
                                      sycl::buffer<double, 1>& ab, std::int64_t lda,
                                      std::int64_t ldb);
-    void (*row_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, std::complex<float> alpha,
                                      sycl::buffer<std::complex<float>, 1>& ab, std::int64_t lda,
                                      std::int64_t ldb);
-    void (*row_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    void (*row_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                      std::int64_t m, std::int64_t n, std::complex<double> alpha,
                                      sycl::buffer<std::complex<double>, 1>& ab, std::int64_t lda,
                                      std::int64_t ldb);
-    void (*row_major_somatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_somatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     float alpha, sycl::buffer<float, 1>& a, std::int64_t lda,
                                     float beta, sycl::buffer<float, 1>& b, std::int64_t ldb,
                                     sycl::buffer<float, 1>& c, std::int64_t ldc);
-    void (*row_major_domatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_domatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     double alpha, sycl::buffer<double, 1>& a, std::int64_t lda,
                                     double beta, sycl::buffer<double, 1>& b, std::int64_t ldb,
                                     sycl::buffer<double, 1>& c, std::int64_t ldc);
-    void (*row_major_comatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_comatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     std::complex<float> alpha,
                                     sycl::buffer<std::complex<float>, 1>& a, std::int64_t lda,
                                     std::complex<float> beta,
                                     sycl::buffer<std::complex<float>, 1>& b, std::int64_t ldb,
                                     sycl::buffer<std::complex<float>, 1>& c, std::int64_t ldc);
-    void (*row_major_zomatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                    oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n,
+    void (*row_major_zomatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                    oneapi::math::transpose transb, std::int64_t m, std::int64_t n,
                                     std::complex<double> alpha,
                                     sycl::buffer<std::complex<double>, 1>& a, std::int64_t lda,
                                     std::complex<double> beta,
@@ -3858,24 +3859,24 @@ typedef struct {
                                             std::complex<double>* x, std::int64_t incx,
                                             std::complex<double>* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                             std::int64_t m, std::int64_t n, std::int64_t kl,
                                             std::int64_t ku, float alpha, const float* a,
                                             std::int64_t lda, const float* x, std::int64_t incx,
                                             float beta, float* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                             std::int64_t m, std::int64_t n, std::int64_t kl,
                                             std::int64_t ku, double alpha, const double* a,
                                             std::int64_t lda, const double* x, std::int64_t incx,
                                             double beta, double* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cgbmv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::int64_t kl, std::int64_t ku, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* x, std::int64_t incx, std::complex<float> beta,
         std::complex<float>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                             std::int64_t m, std::int64_t n, std::int64_t kl,
                                             std::int64_t ku, std::complex<double> alpha,
                                             const std::complex<double>* a, std::int64_t lda,
@@ -3883,19 +3884,19 @@ typedef struct {
                                             std::complex<double> beta, std::complex<double>* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                             std::int64_t m, std::int64_t n, float alpha,
                                             const float* a, std::int64_t lda, const float* x,
                                             std::int64_t incx, float beta, float* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                             std::int64_t m, std::int64_t n, double alpha,
                                             const double* a, std::int64_t lda, const double* x,
                                             std::int64_t incx, double beta, double* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_cgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_cgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                             std::int64_t m, std::int64_t n,
                                             std::complex<float> alpha, const std::complex<float>* a,
                                             std::int64_t lda, const std::complex<float>* x,
@@ -3903,96 +3904,96 @@ typedef struct {
                                             std::complex<float>* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zgemv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
         std::complex<double>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_sgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, const float* a, std::int64_t lda, std::int64_t stridea, const float* x,
         std::int64_t incx, std::int64_t stridex, float beta, float* y, std::int64_t incy,
         std::int64_t stridey, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, const double* a, std::int64_t lda, std::int64_t stridea, const double* x,
         std::int64_t incx, std::int64_t stridex, double beta, double* y, std::int64_t incy,
         std::int64_t stridey, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::int64_t stridea, const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
         std::complex<float> beta, std::complex<float>* y, std::int64_t incy, std::int64_t stridey,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zgemv_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::int64_t stridea, const std::complex<double>* x, std::int64_t incx,
         std::int64_t stridex, std::complex<double> beta, std::complex<double>* y, std::int64_t incy,
         std::int64_t stridey, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_sgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         float* alpha, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx,
         float* beta, float** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         double* alpha, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx,
         double* beta, double** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
         const std::complex<float>** x, std::int64_t* incx, std::complex<float>* beta,
         std::complex<float>** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zgemv_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<double>* alpha, const std::complex<double>** a, std::int64_t* lda,
         const std::complex<double>** x, std::int64_t* incx, std::complex<double>* beta,
         std::complex<double>** y, std::int64_t* incy, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_sdgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const float* a, std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx,
         std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ddgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const double* a, std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx,
         std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cdgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const std::complex<float>* a, std::int64_t lda, std::int64_t stridea,
         const std::complex<float>* x, std::int64_t incx, std::int64_t stridex,
         std::complex<float>* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zdgmm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n,
         const std::complex<double>* a, std::int64_t lda, std::int64_t stridea,
         const std::complex<double>* x, std::int64_t incx, std::int64_t stridex,
         std::complex<double>* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_sdgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ddgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cdgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const std::complex<float>** a, std::int64_t* lda, const std::complex<float>** x,
         std::int64_t* incx, std::complex<float>** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zdgmm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n,
         const std::complex<double>** a, std::int64_t* lda, const std::complex<double>** x,
         std::int64_t* incx, std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
@@ -4030,7 +4031,7 @@ typedef struct {
                                             const std::complex<double>* y, std::int64_t incy,
                                             std::complex<double>* a, std::int64_t lda,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_chbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_chbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::int64_t k,
                                             std::complex<float> alpha, const std::complex<float>* a,
                                             std::int64_t lda, const std::complex<float>* x,
@@ -4038,309 +4039,309 @@ typedef struct {
                                             std::complex<float>* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zhbmv_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         const std::complex<double>* x, std::int64_t incx, std::complex<double> beta,
         std::complex<double>* y, std::int64_t incy, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<float> alpha,
                                             const std::complex<float>* a, std::int64_t lda,
                                             const std::complex<float>* x, std::int64_t incx,
                                             std::complex<float> beta, std::complex<float>* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<double> alpha,
                                             const std::complex<double>* a, std::int64_t lda,
                                             const std::complex<double>* x, std::int64_t incx,
                                             std::complex<double> beta, std::complex<double>* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_cher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_cher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, float alpha,
                                            const std::complex<float>* x, std::int64_t incx,
                                            std::complex<float>* a, std::int64_t lda,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_zher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, double alpha,
                                            const std::complex<double>* x, std::int64_t incx,
                                            std::complex<double>* a, std::int64_t lda,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<float> alpha,
                                             const std::complex<float>* x, std::int64_t incx,
                                             const std::complex<float>* y, std::int64_t incy,
                                             std::complex<float>* a, std::int64_t lda,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<double> alpha,
                                             const std::complex<double>* x, std::int64_t incx,
                                             const std::complex<double>* y, std::int64_t incy,
                                             std::complex<double>* a, std::int64_t lda,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<float> alpha,
                                             const std::complex<float>* a,
                                             const std::complex<float>* x, std::int64_t incx,
                                             std::complex<float> beta, std::complex<float>* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<double> alpha,
                                             const std::complex<double>* a,
                                             const std::complex<double>* x, std::int64_t incx,
                                             std::complex<double> beta, std::complex<double>* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, float alpha,
                                            const std::complex<float>* x, std::int64_t incx,
                                            std::complex<float>* a,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, double alpha,
                                            const std::complex<double>* x, std::int64_t incx,
                                            std::complex<double>* a,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<float> alpha,
                                             const std::complex<float>* x, std::int64_t incx,
                                             const std::complex<float>* y, std::int64_t incy,
                                             std::complex<float>* a,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::complex<double> alpha,
                                             const std::complex<double>* x, std::int64_t incx,
                                             const std::complex<double>* y, std::int64_t incy,
                                             std::complex<double>* a,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::int64_t k, float alpha,
                                             const float* a, std::int64_t lda, const float* x,
                                             std::int64_t incx, float beta, float* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, std::int64_t k, double alpha,
                                             const double* a, std::int64_t lda, const double* x,
                                             std::int64_t incx, double beta, double* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, float alpha, const float* a,
                                             const float* x, std::int64_t incx, float beta, float* y,
                                             std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, double alpha, const double* a,
                                             const double* x, std::int64_t incx, double beta,
                                             double* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, float alpha, const float* x,
                                            std::int64_t incx, float* a,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, double alpha, const double* x,
                                            std::int64_t incx, double* a,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, float alpha, const float* x,
                                             std::int64_t incx, const float* y, std::int64_t incy,
                                             float* a, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, double alpha, const double* x,
                                             std::int64_t incx, const double* y, std::int64_t incy,
                                             double* a,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, float alpha, const float* a,
                                             std::int64_t lda, const float* x, std::int64_t incx,
                                             float beta, float* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, double alpha, const double* a,
                                             std::int64_t lda, const double* x, std::int64_t incx,
                                             double beta, double* y, std::int64_t incy,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, float alpha, const float* x,
                                            std::int64_t incx, float* a, std::int64_t lda,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                            std::int64_t n, double alpha, const double* x,
                                            std::int64_t incx, double* a, std::int64_t lda,
                                            const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, float alpha, const float* x,
                                             std::int64_t incx, const float* y, std::int64_t incy,
                                             float* a, std::int64_t lda,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
+    sycl::event (*row_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
                                             std::int64_t n, double alpha, const double* x,
                                             std::int64_t incx, const double* y, std::int64_t incy,
                                             double* a, std::int64_t lda,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const float* a, std::int64_t lda,
                                             float* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const double* a, std::int64_t lda,
                                             double* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const std::complex<float>* a,
                                             std::int64_t lda, std::complex<float>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const std::complex<double>* a,
                                             std::int64_t lda, std::complex<double>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const float* a, std::int64_t lda,
                                             float* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const double* a, std::int64_t lda,
                                             double* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const std::complex<float>* a,
                                             std::int64_t lda, std::complex<float>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             std::int64_t k, const std::complex<double>* a,
                                             std::int64_t lda, std::complex<double>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const float* a, float* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const double* a, double* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<float>* a, std::complex<float>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<double>* a, std::complex<double>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const float* a, float* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const double* a, double* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<float>* a, std::complex<float>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<double>* a, std::complex<double>* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const float* a, std::int64_t lda, float* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const double* a, std::int64_t lda, double* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<float>* a, std::int64_t lda,
                                             std::complex<float>* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<double>* a, std::int64_t lda,
                                             std::complex<double>* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const float* a, std::int64_t lda, float* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const double* a, std::int64_t lda, double* x,
                                             std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<float>* a, std::int64_t lda,
                                             std::complex<float>* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t n,
+    sycl::event (*row_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t n,
                                             const std::complex<double>* a, std::int64_t lda,
                                             std::complex<double>* x, std::int64_t incx,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                            oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*row_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                            oneapi::math::transpose transb, std::int64_t m,
                                             std::int64_t n, std::int64_t k, float alpha,
                                             const float* a, std::int64_t lda, const float* b,
                                             std::int64_t ldb, float beta, float* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                            oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*row_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                            oneapi::math::transpose transb, std::int64_t m,
                                             std::int64_t n, std::int64_t k, double alpha,
                                             const double* a, std::int64_t lda, const double* b,
                                             std::int64_t ldb, double beta, double* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_cgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                            oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*row_major_cgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                            oneapi::math::transpose transb, std::int64_t m,
                                             std::int64_t n, std::int64_t k,
                                             std::complex<float> alpha, const std::complex<float>* a,
                                             std::int64_t lda, const std::complex<float>* b,
@@ -4348,582 +4349,583 @@ typedef struct {
                                             std::complex<float>* c, std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zgemm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
         std::int64_t ldb, std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
         const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                            oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*row_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                            oneapi::math::transpose transb, std::int64_t m,
                                             std::int64_t n, std::int64_t k, sycl::half alpha,
                                             const sycl::half* a, std::int64_t lda,
                                             const sycl::half* b, std::int64_t ldb, sycl::half beta,
                                             sycl::half* c, std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_f16f16f32_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
         std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c,
         std::int64_t ldc, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_bf16bf16f32_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::mkl::bfloat16* a,
-        std::int64_t lda, const oneapi::mkl::bfloat16* b, std::int64_t ldb, float beta, float* c,
-        std::int64_t ldc, const std::vector<sycl::event>& dependencies);
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b,
+        std::int64_t ldb, float beta, float* c, std::int64_t ldc,
+        const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_chemm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*row_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower, std::int64_t m,
                                             std::int64_t n, std::complex<double> alpha,
                                             const std::complex<double>* a, std::int64_t lda,
                                             const std::complex<double>* b, std::int64_t ldb,
                                             std::complex<double> beta, std::complex<double>* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans, std::int64_t n,
                                             std::int64_t k, float alpha,
                                             const std::complex<float>* a, std::int64_t lda,
                                             float beta, std::complex<float>* c, std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans, std::int64_t n,
                                             std::int64_t k, double alpha,
                                             const std::complex<double>* a, std::int64_t lda,
                                             double beta, std::complex<double>* c, std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_cher2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                             oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_cher2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                             oneapi::math::transpose trans, std::int64_t n,
                                              std::int64_t k, std::complex<float> alpha,
                                              const std::complex<float>* a, std::int64_t lda,
                                              const std::complex<float>* b, std::int64_t ldb,
                                              float beta, std::complex<float>* c, std::int64_t ldc,
                                              const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zher2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                             oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_zher2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                             oneapi::math::transpose trans, std::int64_t n,
                                              std::int64_t k, std::complex<double> alpha,
                                              const std::complex<double>* a, std::int64_t lda,
                                              const std::complex<double>* b, std::int64_t ldb,
                                              double beta, std::complex<double>* c, std::int64_t ldc,
                                              const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*row_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower, std::int64_t m,
                                             std::int64_t n, float alpha, const float* a,
                                             std::int64_t lda, const float* b, std::int64_t ldb,
                                             float beta, float* c, std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*row_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower, std::int64_t m,
                                             std::int64_t n, double alpha, const double* a,
                                             std::int64_t lda, const double* b, std::int64_t ldb,
                                             double beta, double* c, std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_csymm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower, std::int64_t m,
+    sycl::event (*row_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower, std::int64_t m,
                                             std::int64_t n, std::complex<double> alpha,
                                             const std::complex<double>* a, std::int64_t lda,
                                             const std::complex<double>* b, std::int64_t ldb,
                                             std::complex<double> beta, std::complex<double>* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans, std::int64_t n,
                                             std::int64_t k, float alpha, const float* a,
                                             std::int64_t lda, float beta, float* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans, std::int64_t n,
                                             std::int64_t k, double alpha, const double* a,
                                             std::int64_t lda, double beta, double* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans, std::int64_t n,
                                             std::int64_t k, std::complex<float> alpha,
                                             const std::complex<float>* a, std::int64_t lda,
                                             std::complex<float> beta, std::complex<float>* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans, std::int64_t n,
                                             std::int64_t k, std::complex<double> alpha,
                                             const std::complex<double>* a, std::int64_t lda,
                                             std::complex<double> beta, std::complex<double>* c,
                                             std::int64_t ldc,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ssyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda,
         float* beta, float** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dsyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda,
         double* beta, double** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_csyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, std::complex<float>* alpha, const std::complex<float>** a,
         std::int64_t* lda, std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
         std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zsyrk_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans,
+        sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans,
         std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
         const std::complex<double>** a, std::int64_t* lda, std::complex<double>* beta,
         std::complex<double>** c, std::int64_t* ldc, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ssyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda,
         std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dsyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda,
         std::int64_t stride_a, double beta, double* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_csyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<float> beta, std::complex<float>* c,
         std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zsyrk_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<double> alpha, const std::complex<double>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<double> beta, std::complex<double>* c,
         std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                             oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                             oneapi::math::transpose trans, std::int64_t n,
                                              std::int64_t k, float alpha, const float* a,
                                              std::int64_t lda, const float* b, std::int64_t ldb,
                                              float beta, float* c, std::int64_t ldc,
                                              const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                             oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                             oneapi::math::transpose trans, std::int64_t n,
                                              std::int64_t k, double alpha, const double* a,
                                              std::int64_t lda, const double* b, std::int64_t ldb,
                                              double beta, double* c, std::int64_t ldc,
                                              const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_csyr2k_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans,
         std::int64_t n, std::int64_t k, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, const std::complex<float>* b, std::int64_t ldb, std::complex<float> beta,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                             oneapi::mkl::transpose trans, std::int64_t n,
+    sycl::event (*row_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                             oneapi::math::transpose trans, std::int64_t n,
                                              std::int64_t k, std::complex<double> alpha,
                                              const std::complex<double>* a, std::int64_t lda,
                                              const std::complex<double>* b, std::int64_t ldb,
                                              std::complex<double> beta, std::complex<double>* c,
                                              std::int64_t ldc,
                                              const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*row_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t m,
                                             std::int64_t n, float alpha, const float* a,
                                             std::int64_t lda, float* b, std::int64_t ldb,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*row_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t m,
                                             std::int64_t n, double alpha, const double* a,
                                             std::int64_t lda, double* b, std::int64_t ldb,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ctrmm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::complex<float>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ztrmm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::complex<double>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*row_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t m,
                                             std::int64_t n, float alpha, const float* a,
                                             std::int64_t lda, float* b, std::int64_t ldb,
                                             const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right,
-                                            oneapi::mkl::uplo upper_lower,
-                                            oneapi::mkl::transpose trans,
-                                            oneapi::mkl::diag unit_diag, std::int64_t m,
+    sycl::event (*row_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right,
+                                            oneapi::math::uplo upper_lower,
+                                            oneapi::math::transpose trans,
+                                            oneapi::math::diag unit_diag, std::int64_t m,
                                             std::int64_t n, double alpha, const double* a,
                                             std::int64_t lda, double* b, std::int64_t ldb,
                                             const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ctrsm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::complex<float>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ztrsm_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::complex<double>* b, std::int64_t ldb, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_strsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dtrsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ctrsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ztrsm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower,
-        oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower,
+        oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_strsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b,
         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dtrsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b,
         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ctrsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, std::complex<float>* alpha, const std::complex<float>** a,
         std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_ztrsm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower,
-        oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m,
+        sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower,
+        oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m,
         std::int64_t* n, std::complex<double>* alpha, const std::complex<double>** a,
         std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* group_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_sgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const float** a,
         std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, double* alpha, const double** a,
         std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, double** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex<float>* alpha,
         const std::complex<float>** a, std::int64_t* lda, const std::complex<float>** b,
         std::int64_t* ldb, std::complex<float>* beta, std::complex<float>** c, std::int64_t* ldc,
         std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex<double>* alpha,
         const std::complex<double>** a, std::int64_t* lda, const std::complex<double>** b,
         std::int64_t* ldb, std::complex<double>* beta, std::complex<double>** c, std::int64_t* ldc,
         std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_hgemm_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a,
         std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta,
         sycl::half** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_f16f16f32_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a,
         std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_s8s8f32_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a,
         std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, float** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_s8s8s32_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb,
+        sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb,
         std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a,
         std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c,
         std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_sgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a,
         std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a,
         std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb,
         std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float> alpha,
         const std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
         const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<float> beta, std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         const std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
         const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<double> beta, std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_hgemm_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a,
         std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
         std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc,
         std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_f16f16f32_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a,
         std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_s8s8f32_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a,
         std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_s8s8s32_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a,
         std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb,
         std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                             oneapi::mkl::transpose transa,
-                                             oneapi::mkl::transpose transb, std::int64_t n,
+    sycl::event (*row_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                             oneapi::math::transpose transa,
+                                             oneapi::math::transpose transb, std::int64_t n,
                                              std::int64_t k, float alpha, const float* a,
                                              std::int64_t lda, const float* b, std::int64_t ldb,
                                              float beta, float* c, std::int64_t ldc,
                                              const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower,
-                                             oneapi::mkl::transpose transa,
-                                             oneapi::mkl::transpose transb, std::int64_t n,
+    sycl::event (*row_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower,
+                                             oneapi::math::transpose transa,
+                                             oneapi::math::transpose transb, std::int64_t n,
                                              std::int64_t k, double alpha, const double* a,
                                              std::int64_t lda, const double* b, std::int64_t ldb,
                                              double beta, double* c, std::int64_t ldc,
                                              const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cgemmt_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-        oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex<float> alpha,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+        oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex<float> alpha,
         const std::complex<float>* a, std::int64_t lda, const std::complex<float>* b,
         std::int64_t ldb, std::complex<float> beta, std::complex<float>* c, std::int64_t ldc,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zgemmt_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-        oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex<double> alpha,
+        sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+        oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex<double> alpha,
         const std::complex<double>* a, std::int64_t lda, const std::complex<double>* b,
         std::int64_t ldb, std::complex<double> beta, std::complex<double>* c, std::int64_t ldc,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_s8u8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b,
         std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_s8s8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b,
         std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_u8s8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::int8_t* b,
         std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_gemm_u8u8s32_bias_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
-        oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
+        oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha,
         const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::uint8_t* b,
         std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc,
         const std::int32_t* co, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_somatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_domatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b,
         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_comatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, const std::complex<float>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zomatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, const std::complex<double>* a, std::int64_t lda,
         std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_simatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dimatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cimatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<float> alpha, std::complex<float>* ab, std::int64_t lda, std::int64_t ldb,
         std::int64_t stride, std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zimatcopy_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
         std::complex<double> alpha, std::complex<double>* ab, std::int64_t lda, std::int64_t ldb,
         std::int64_t stride, std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_somatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda,
         std::int64_t stride_a, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b,
         float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_domatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda,
         std::int64_t stride_a, double beta, const double* b, std::int64_t ldb,
         std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c,
         std::int64_t batch_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_comatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
         const std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<float>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zomatadd_batch_strided_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<double> alpha, const std::complex<double>* a,
         std::int64_t lda, std::int64_t stride_a, std::complex<double> beta,
         const std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::complex<double>* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size,
         const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_somatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_somatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n, float alpha,
                                                 const float* a, std::int64_t lda, float* b,
                                                 std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_domatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_domatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n, double alpha,
                                                 const double* a, std::int64_t lda, double* b,
                                                 std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_comatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_comatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n,
                                                 std::complex<float> alpha,
                                                 const std::complex<float>* a, std::int64_t lda,
                                                 std::complex<float>* b, std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zomatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_zomatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n,
                                                 std::complex<double> alpha,
                                                 const std::complex<double>* a, std::int64_t lda,
                                                 std::complex<double>* b, std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_somatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_somatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                  std::int64_t m, std::int64_t n, float alpha,
                                                  const float* a, std::int64_t lda,
                                                  std::int64_t stridea, float* b, std::int64_t ldb,
                                                  std::int64_t strideb,
                                                  const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_domatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_domatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                  std::int64_t m, std::int64_t n, double alpha,
                                                  const double* a, std::int64_t lda,
                                                  std::int64_t stridea, double* b, std::int64_t ldb,
                                                  std::int64_t strideb,
                                                  const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_comatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_comatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                  std::int64_t m, std::int64_t n,
                                                  std::complex<float> alpha,
                                                  const std::complex<float>* a, std::int64_t lda,
                                                  std::int64_t stridea, std::complex<float>* b,
                                                  std::int64_t ldb, std::int64_t strideb,
                                                  const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zomatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_zomatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                  std::int64_t m, std::int64_t n,
                                                  std::complex<double> alpha,
                                                  const std::complex<double>* a, std::int64_t lda,
                                                  std::int64_t stridea, std::complex<double>* b,
                                                  std::int64_t ldb, std::int64_t strideb,
                                                  const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_simatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_simatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n, float alpha,
                                                 float* ab, std::int64_t lda, std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_dimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_dimatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n, double alpha,
                                                 double* ab, std::int64_t lda, std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_cimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_cimatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n,
                                                 std::complex<float> alpha, std::complex<float>* ab,
                                                 std::int64_t lda, std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*row_major_zimatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t m, std::int64_t n,
                                                 std::complex<double> alpha,
                                                 std::complex<double>* ab, std::int64_t lda,
                                                 std::int64_t ldb,
                                                 const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_somatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*row_major_somatadd_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, float alpha, const float* a,
                                                std::int64_t lda, float beta, const float* b,
                                                std::int64_t ldb, float* c, std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_domatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*row_major_domatadd_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, double alpha, const double* a,
                                                std::int64_t lda, double beta, const double* b,
                                                std::int64_t ldb, double* c, std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_comatadd_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb,
+        sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb,
         std::int64_t m, std::int64_t n, std::complex<float> alpha, const std::complex<float>* a,
         std::int64_t lda, std::complex<float> beta, const std::complex<float>* b, std::int64_t ldb,
         std::complex<float>* c, std::int64_t ldc, const std::vector<sycl::event>& dependencies);
-    sycl::event (*row_major_zomatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa,
-                                               oneapi::mkl::transpose transb, std::int64_t m,
+    sycl::event (*row_major_zomatadd_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa,
+                                               oneapi::math::transpose transb, std::int64_t m,
                                                std::int64_t n, std::complex<double> alpha,
                                                const std::complex<double>* a, std::int64_t lda,
                                                std::complex<double> beta,
@@ -4931,40 +4933,40 @@ typedef struct {
                                                std::complex<double>* c, std::int64_t ldc,
                                                const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_somatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_domatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_comatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<float>* alpha, const std::complex<float>** a, std::int64_t* lda,
         std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zomatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<double>* alpha, const std::complex<double>** a, std::int64_t* lda,
         std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_simatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_dimatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
         std::int64_t* groupsize, const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_cimatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<float>* alpha, std::complex<float>** ab, std::int64_t* lda, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*row_major_zimatcopy_batch_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n,
         std::complex<double>* alpha, std::complex<double>** ab, std::int64_t* lda,
         std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize,
         const std::vector<sycl::event>& dependencies);
diff --git a/src/config.hpp.in b/src/config.hpp.in
index 5d8b9a136..63393e2d8 100644
--- a/src/config.hpp.in
+++ b/src/config.hpp.in
@@ -17,27 +17,27 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef ONEMKL_CONFIG_H
-#define ONEMKL_CONFIG_H
+#ifndef ONEMATH_CONFIG_H
+#define ONEMATH_CONFIG_H
 
-#cmakedefine ONEMKL_ENABLE_CUBLAS_BACKEND
-#cmakedefine ONEMKL_ENABLE_CUFFT_BACKEND
-#cmakedefine ONEMKL_ENABLE_CURAND_BACKEND
-#cmakedefine ONEMKL_ENABLE_CUSOLVER_BACKEND
-#cmakedefine ONEMKL_ENABLE_CUSPARSE_BACKEND
-#cmakedefine ONEMKL_ENABLE_MKLCPU_BACKEND
-#cmakedefine ONEMKL_ENABLE_MKLGPU_BACKEND
-#cmakedefine ONEMKL_ENABLE_NETLIB_BACKEND
-#cmakedefine ONEMKL_ENABLE_PORTBLAS_BACKEND
-#cmakedefine ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU
-#cmakedefine ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU
-#cmakedefine ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU
-#cmakedefine ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU
-#cmakedefine ONEMKL_ENABLE_PORTFFT_BACKEND
-#cmakedefine ONEMKL_ENABLE_ROCBLAS_BACKEND
-#cmakedefine ONEMKL_ENABLE_ROCFFT_BACKEND
-#cmakedefine ONEMKL_ENABLE_ROCRAND_BACKEND
-#cmakedefine ONEMKL_ENABLE_ROCSOLVER_BACKEND
-#cmakedefine ONEMKL_BUILD_SHARED_LIBS
+#cmakedefine ONEMATH_ENABLE_CUBLAS_BACKEND
+#cmakedefine ONEMATH_ENABLE_CUFFT_BACKEND
+#cmakedefine ONEMATH_ENABLE_CURAND_BACKEND
+#cmakedefine ONEMATH_ENABLE_CUSOLVER_BACKEND
+#cmakedefine ONEMATH_ENABLE_CUSPARSE_BACKEND
+#cmakedefine ONEMATH_ENABLE_MKLCPU_BACKEND
+#cmakedefine ONEMATH_ENABLE_MKLGPU_BACKEND
+#cmakedefine ONEMATH_ENABLE_NETLIB_BACKEND
+#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND
+#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU
+#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU
+#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU
+#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU
+#cmakedefine ONEMATH_ENABLE_PORTFFT_BACKEND
+#cmakedefine ONEMATH_ENABLE_ROCBLAS_BACKEND
+#cmakedefine ONEMATH_ENABLE_ROCFFT_BACKEND
+#cmakedefine ONEMATH_ENABLE_ROCRAND_BACKEND
+#cmakedefine ONEMATH_ENABLE_ROCSOLVER_BACKEND
+#cmakedefine ONEMATH_BUILD_SHARED_LIBS
 
 #endif
diff --git a/src/dft/CMakeLists.txt b/src/dft/CMakeLists.txt
index e3b373645..40987cf38 100644
--- a/src/dft/CMakeLists.txt
+++ b/src/dft/CMakeLists.txt
@@ -22,29 +22,30 @@ add_subdirectory(backends)
 
 # Recipe for DFT loader object
 if(BUILD_SHARED_LIBS)
-add_library(onemkl_dft OBJECT)
-target_sources(onemkl_dft PRIVATE backends/descriptor.cpp dft_loader.cpp)
-target_include_directories(onemkl_dft
+add_library(onemath_dft OBJECT)
+add_deprecated_library(onemath_dft)
+target_sources(onemath_dft PRIVATE backends/descriptor.cpp dft_loader.cpp)
+target_include_directories(onemath_dft
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
-          $<TARGET_FILE_DIR:onemkl>
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
+          $<TARGET_FILE_DIR:onemath>
 )
 
-target_compile_options(onemkl_dft PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(onemath_dft PRIVATE ${ONEMATH_BUILD_COPT})
 
-set_target_properties(onemkl_dft PROPERTIES
+set_target_properties(onemath_dft PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
-  add_sycl_to_target(TARGET onemkl_dft SOURCES backends/descriptor.cxx dft_loader.cpp)
+  add_sycl_to_target(TARGET onemath_dft SOURCES backends/descriptor.cxx dft_loader.cpp)
 else()
-  target_link_libraries(onemkl_dft PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(onemath_dft PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
 include(WarningsUtils)
-target_link_libraries(onemkl_dft PRIVATE onemkl_warnings)
+target_link_libraries(onemath_dft PRIVATE onemath_warnings)
 
 endif()
diff --git a/src/dft/backends/CMakeLists.txt b/src/dft/backends/CMakeLists.txt
index b03a63e8a..262bfe3cc 100644
--- a/src/dft/backends/CMakeLists.txt
+++ b/src/dft/backends/CMakeLists.txt
@@ -17,8 +17,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-add_custom_target(onemkl_backend_libs_dft)
-add_dependencies(onemkl_backend_libs onemkl_backend_libs_dft)
+add_custom_target(onemath_backend_libs_dft)
+add_dependencies(onemath_backend_libs onemath_backend_libs_dft)
 
 if(ENABLE_MKLGPU_BACKEND)
   add_subdirectory(mklgpu)
diff --git a/src/dft/backends/backend_backward_instantiations.cxx b/src/dft/backends/backend_backward_instantiations.cxx
index e4d960afb..e475ff55c 100644
--- a/src/dft/backends/backend_backward_instantiations.cxx
+++ b/src/dft/backends/backend_backward_instantiations.cxx
@@ -27,32 +27,32 @@ using desc_cd_t =
     dft::detail::descriptor<dft::detail::precision::DOUBLE, dft::detail::domain::COMPLEX>;
 using depends_vec_t = const std::vector<sycl::event>&;
 
-#define ONEMKL_DFT_BACKWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T)         \
-    /* Buffer API */                                                                              \
-    template ONEMKL_EXPORT void compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&,                     \
-                                                               sycl::buffer<FORWARD_T>&);         \
-    template ONEMKL_EXPORT void compute_backward<DESCRIPTOR_T>(                                   \
-        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&);                         \
-    template ONEMKL_EXPORT void compute_backward<DESCRIPTOR_T>(                                   \
-        DESCRIPTOR_T&, sycl::buffer<BACKWARD_T>&, sycl::buffer<FORWARD_T>&);                      \
-    template ONEMKL_EXPORT void compute_backward<DESCRIPTOR_T>(                                   \
-        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&, \
-        sycl::buffer<SCALAR_T>&);                                                                 \
-                                                                                                  \
-    /* USM API */                                                                                 \
-    template ONEMKL_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&, FORWARD_T*,  \
-                                                                      depends_vec_t);             \
-    template ONEMKL_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&, SCALAR_T*,   \
-                                                                      SCALAR_T*, depends_vec_t);  \
-    template ONEMKL_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&, BACKWARD_T*, \
-                                                                      FORWARD_T*, depends_vec_t); \
-    template ONEMKL_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(                            \
+#define ONEMATH_DFT_BACKWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T)         \
+    /* Buffer API */                                                                               \
+    template ONEMATH_EXPORT void compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&,                     \
+                                                                sycl::buffer<FORWARD_T>&);         \
+    template ONEMATH_EXPORT void compute_backward<DESCRIPTOR_T>(                                   \
+        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&);                          \
+    template ONEMATH_EXPORT void compute_backward<DESCRIPTOR_T>(                                   \
+        DESCRIPTOR_T&, sycl::buffer<BACKWARD_T>&, sycl::buffer<FORWARD_T>&);                       \
+    template ONEMATH_EXPORT void compute_backward<DESCRIPTOR_T>(                                   \
+        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&,  \
+        sycl::buffer<SCALAR_T>&);                                                                  \
+                                                                                                   \
+    /* USM API */                                                                                  \
+    template ONEMATH_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&, FORWARD_T*,  \
+                                                                       depends_vec_t);             \
+    template ONEMATH_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&, SCALAR_T*,   \
+                                                                       SCALAR_T*, depends_vec_t);  \
+    template ONEMATH_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(DESCRIPTOR_T&, BACKWARD_T*, \
+                                                                       FORWARD_T*, depends_vec_t); \
+    template ONEMATH_EXPORT sycl::event compute_backward<DESCRIPTOR_T>(                            \
         DESCRIPTOR_T&, SCALAR_T*, SCALAR_T*, SCALAR_T*, SCALAR_T*, depends_vec_t);
 
-ONEMKL_DFT_BACKWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex<float>)
-ONEMKL_DFT_BACKWARD_INSTANTIATIONS(desc_cf_t, float, std::complex<float>, std::complex<float>)
-ONEMKL_DFT_BACKWARD_INSTANTIATIONS(desc_rd_t, double, double, std::complex<double>)
-ONEMKL_DFT_BACKWARD_INSTANTIATIONS(desc_cd_t, double, std::complex<double>, std::complex<double>)
+ONEMATH_DFT_BACKWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex<float>)
+ONEMATH_DFT_BACKWARD_INSTANTIATIONS(desc_cf_t, float, std::complex<float>, std::complex<float>)
+ONEMATH_DFT_BACKWARD_INSTANTIATIONS(desc_rd_t, double, double, std::complex<double>)
+ONEMATH_DFT_BACKWARD_INSTANTIATIONS(desc_cd_t, double, std::complex<double>, std::complex<double>)
 
-#undef ONEMKL_DFT_BACKWARD_INSTANTIATIONS
-#undef ONEMKL_DFT_BACKWARD_INSTANTIATIONS_REAL_ONLY
+#undef ONEMATH_DFT_BACKWARD_INSTANTIATIONS
+#undef ONEMATH_DFT_BACKWARD_INSTANTIATIONS_REAL_ONLY
diff --git a/src/dft/backends/backend_compute_signature.cxx b/src/dft/backends/backend_compute_signature.cxx
index d011cb995..71fcd793a 100644
--- a/src/dft/backends/backend_compute_signature.cxx
+++ b/src/dft/backends/backend_compute_signature.cxx
@@ -32,12 +32,12 @@ using descriptor_type = typename dft::detail::descriptor<prec, dom>;
 void forward_ip_cc(descriptor_type& desc, sycl::buffer<fwd_type, 1>& inout) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<fwd_type, 1>>(
         "compute_forward");
-    oneapi::mkl::dft::BACKEND::compute_forward(desc, inout);
+    oneapi::math::dft::BACKEND::compute_forward(desc, inout);
 }
 sycl::event forward_ip_cc(descriptor_type& desc, fwd_type* inout,
                           const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<fwd_type*>("compute_forward");
-    return oneapi::mkl::dft::BACKEND::compute_forward(desc, inout, dependencies);
+    return oneapi::math::dft::BACKEND::compute_forward(desc, inout, dependencies);
 }
 
 // forward inplace REAL_REAL
@@ -45,12 +45,12 @@ void forward_ip_rr(descriptor_type& desc, sycl::buffer<scalar_type, 1>& inout_re
                    sycl::buffer<scalar_type, 1>& inout_im) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
         "compute_forward");
-    oneapi::mkl::dft::BACKEND::compute_forward(desc, inout_re, inout_im);
+    oneapi::math::dft::BACKEND::compute_forward(desc, inout_re, inout_im);
 }
 sycl::event forward_ip_rr(descriptor_type& desc, scalar_type* inout_re, scalar_type* inout_im,
                           const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>("compute_forward");
-    return oneapi::mkl::dft::BACKEND::compute_forward(desc, inout_re, inout_im, dependencies);
+    return oneapi::math::dft::BACKEND::compute_forward(desc, inout_re, inout_im, dependencies);
 }
 
 // forward out-of-place COMPLEX_COMPLEX
@@ -58,12 +58,13 @@ void forward_op_cc(descriptor_type& desc, sycl::buffer<fwd_type, 1>& in,
                    sycl::buffer<bwd_type, 1>& out) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<fwd_type, 1>>(
         "compute_forward");
-    oneapi::mkl::dft::BACKEND::compute_forward<descriptor_type>(desc, in, out);
+    oneapi::math::dft::BACKEND::compute_forward<descriptor_type>(desc, in, out);
 }
 sycl::event forward_op_cc(descriptor_type& desc, fwd_type* in, bwd_type* out,
                           const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<fwd_type*>("compute_forward");
-    return oneapi::mkl::dft::BACKEND::compute_forward<descriptor_type>(desc, in, out, dependencies);
+    return oneapi::math::dft::BACKEND::compute_forward<descriptor_type>(desc, in, out,
+                                                                        dependencies);
 }
 
 // forward out-of-place REAL_REAL
@@ -72,26 +73,26 @@ void forward_op_rr(descriptor_type& desc, sycl::buffer<scalar_type, 1>& in_re,
                    sycl::buffer<scalar_type, 1>& out_im) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
         "compute_forward");
-    oneapi::mkl::dft::BACKEND::compute_forward(desc, in_re, in_im, out_re, out_im);
+    oneapi::math::dft::BACKEND::compute_forward(desc, in_re, in_im, out_re, out_im);
 }
 sycl::event forward_op_rr(descriptor_type& desc, scalar_type* in_re, scalar_type* in_im,
                           scalar_type* out_re, scalar_type* out_im,
                           const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>("compute_forward");
-    return oneapi::mkl::dft::BACKEND::compute_forward(desc, in_re, in_im, out_re, out_im,
-                                                      dependencies);
+    return oneapi::math::dft::BACKEND::compute_forward(desc, in_re, in_im, out_re, out_im,
+                                                       dependencies);
 }
 
 // backward inplace COMPLEX_COMPLEX
 void backward_ip_cc(descriptor_type& desc, sycl::buffer<fwd_type, 1>& inout) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<fwd_type, 1>>(
         "compute_backward");
-    oneapi::mkl::dft::BACKEND::compute_backward(desc, inout);
+    oneapi::math::dft::BACKEND::compute_backward(desc, inout);
 }
 sycl::event backward_ip_cc(descriptor_type& desc, fwd_type* inout,
                            const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<fwd_type*>("compute_backward");
-    return oneapi::mkl::dft::BACKEND::compute_backward(desc, inout, dependencies);
+    return oneapi::math::dft::BACKEND::compute_backward(desc, inout, dependencies);
 }
 
 // backward inplace REAL_REAL
@@ -99,12 +100,12 @@ void backward_ip_rr(descriptor_type& desc, sycl::buffer<scalar_type, 1>& inout_r
                     sycl::buffer<scalar_type, 1>& inout_im) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
         "compute_backward");
-    oneapi::mkl::dft::BACKEND::compute_backward(desc, inout_re, inout_im);
+    oneapi::math::dft::BACKEND::compute_backward(desc, inout_re, inout_im);
 }
 sycl::event backward_ip_rr(descriptor_type& desc, scalar_type* inout_re, scalar_type* inout_im,
                            const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>("compute_backward");
-    return oneapi::mkl::dft::BACKEND::compute_backward(desc, inout_re, inout_im, dependencies);
+    return oneapi::math::dft::BACKEND::compute_backward(desc, inout_re, inout_im, dependencies);
 }
 
 // backward out-of-place COMPLEX_COMPLEX
@@ -112,12 +113,12 @@ void backward_op_cc(descriptor_type& desc, sycl::buffer<bwd_type, 1>& in,
                     sycl::buffer<fwd_type, 1>& out) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<bwd_type, 1>>(
         "compute_backward");
-    oneapi::mkl::dft::BACKEND::compute_backward(desc, in, out);
+    oneapi::math::dft::BACKEND::compute_backward(desc, in, out);
 }
 sycl::event backward_op_cc(descriptor_type& desc, bwd_type* in, fwd_type* out,
                            const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<bwd_type*>("compute_backward");
-    return oneapi::mkl::dft::BACKEND::compute_backward(desc, in, out, dependencies);
+    return oneapi::math::dft::BACKEND::compute_backward(desc, in, out, dependencies);
 }
 
 // backward out-of-place REAL_REAL
@@ -126,12 +127,12 @@ void backward_op_rr(descriptor_type& desc, sycl::buffer<scalar_type, 1>& in_re,
                     sycl::buffer<scalar_type, 1>& out_im) override {
     dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
         "compute_backward");
-    oneapi::mkl::dft::BACKEND::compute_backward(desc, in_re, in_im, out_re, out_im);
+    oneapi::math::dft::BACKEND::compute_backward(desc, in_re, in_im, out_re, out_im);
 }
 sycl::event backward_op_rr(descriptor_type& desc, scalar_type* in_re, scalar_type* in_im,
                            scalar_type* out_re, scalar_type* out_im,
                            const std::vector<sycl::event>& dependencies) override {
     dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>("compute_backward");
-    return oneapi::mkl::dft::BACKEND::compute_backward(desc, in_re, in_im, out_re, out_im,
-                                                       dependencies);
+    return oneapi::math::dft::BACKEND::compute_backward(desc, in_re, in_im, out_re, out_im,
+                                                        dependencies);
 }
diff --git a/src/dft/backends/backend_forward_instantiations.cxx b/src/dft/backends/backend_forward_instantiations.cxx
index b23a5ca40..17ac748f4 100644
--- a/src/dft/backends/backend_forward_instantiations.cxx
+++ b/src/dft/backends/backend_forward_instantiations.cxx
@@ -27,32 +27,32 @@ using desc_cd_t =
     dft::detail::descriptor<dft::detail::precision::DOUBLE, dft::detail::domain::COMPLEX>;
 using depends_vec_t = const std::vector<sycl::event>&;
 
-#define ONEMKL_DFT_FORWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T)          \
-    /* Buffer API */                                                                              \
-    template ONEMKL_EXPORT void compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&,                      \
-                                                              sycl::buffer<FORWARD_T>&);          \
-    template ONEMKL_EXPORT void compute_forward<DESCRIPTOR_T>(                                    \
-        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&);                         \
-    template ONEMKL_EXPORT void compute_forward<DESCRIPTOR_T>(                                    \
-        DESCRIPTOR_T&, sycl::buffer<FORWARD_T>&, sycl::buffer<BACKWARD_T>&);                      \
-    template ONEMKL_EXPORT void compute_forward<DESCRIPTOR_T>(                                    \
-        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&, \
-        sycl::buffer<SCALAR_T>&);                                                                 \
-                                                                                                  \
-    /* USM API */                                                                                 \
-    template ONEMKL_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&, FORWARD_T*,   \
-                                                                     depends_vec_t);              \
-    template ONEMKL_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&, SCALAR_T*,    \
-                                                                     SCALAR_T*, depends_vec_t);   \
-    template ONEMKL_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&, FORWARD_T*,   \
-                                                                     BACKWARD_T*, depends_vec_t); \
-    template ONEMKL_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(                             \
+#define ONEMATH_DFT_FORWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T)          \
+    /* Buffer API */                                                                               \
+    template ONEMATH_EXPORT void compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&,                      \
+                                                               sycl::buffer<FORWARD_T>&);          \
+    template ONEMATH_EXPORT void compute_forward<DESCRIPTOR_T>(                                    \
+        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&);                          \
+    template ONEMATH_EXPORT void compute_forward<DESCRIPTOR_T>(                                    \
+        DESCRIPTOR_T&, sycl::buffer<FORWARD_T>&, sycl::buffer<BACKWARD_T>&);                       \
+    template ONEMATH_EXPORT void compute_forward<DESCRIPTOR_T>(                                    \
+        DESCRIPTOR_T&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&, sycl::buffer<SCALAR_T>&,  \
+        sycl::buffer<SCALAR_T>&);                                                                  \
+                                                                                                   \
+    /* USM API */                                                                                  \
+    template ONEMATH_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&, FORWARD_T*,   \
+                                                                      depends_vec_t);              \
+    template ONEMATH_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&, SCALAR_T*,    \
+                                                                      SCALAR_T*, depends_vec_t);   \
+    template ONEMATH_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(DESCRIPTOR_T&, FORWARD_T*,   \
+                                                                      BACKWARD_T*, depends_vec_t); \
+    template ONEMATH_EXPORT sycl::event compute_forward<DESCRIPTOR_T>(                             \
         DESCRIPTOR_T&, SCALAR_T*, SCALAR_T*, SCALAR_T*, SCALAR_T*, depends_vec_t);
 
-ONEMKL_DFT_FORWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex<float>)
-ONEMKL_DFT_FORWARD_INSTANTIATIONS(desc_cf_t, float, std::complex<float>, std::complex<float>)
-ONEMKL_DFT_FORWARD_INSTANTIATIONS(desc_rd_t, double, double, std::complex<double>)
-ONEMKL_DFT_FORWARD_INSTANTIATIONS(desc_cd_t, double, std::complex<double>, std::complex<double>)
+ONEMATH_DFT_FORWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex<float>)
+ONEMATH_DFT_FORWARD_INSTANTIATIONS(desc_cf_t, float, std::complex<float>, std::complex<float>)
+ONEMATH_DFT_FORWARD_INSTANTIATIONS(desc_rd_t, double, double, std::complex<double>)
+ONEMATH_DFT_FORWARD_INSTANTIATIONS(desc_cd_t, double, std::complex<double>, std::complex<double>)
 
-#undef ONEMKL_DFT_FORWARD_INSTANTIATIONS
-#undef ONEMKL_DFT_FORWARD_INSTANTIATIONS_REAL_ONLY
+#undef ONEMATH_DFT_FORWARD_INSTANTIATIONS
+#undef ONEMATH_DFT_FORWARD_INSTANTIATIONS_REAL_ONLY
diff --git a/src/dft/backends/backend_wrappers.cxx b/src/dft/backends/backend_wrappers.cxx
index 5d0d2bddc..1beb8f988 100644
--- a/src/dft/backends/backend_wrappers.cxx
+++ b/src/dft/backends/backend_wrappers.cxx
@@ -26,7 +26,7 @@ To use this:
 #define WRAPPER_VERSION <Wrapper version number>
 #define BACKEND         <Backend name eg. mklgpu>
 
-extern "C" dft_function_table_t mkl_dft_table = {
+extern "C" dft_function_table_t onemath_dft_table = {
     WRAPPER_VERSION,
 #include "dft/backends/backend_wrappers.cxx"
 };
@@ -37,10 +37,10 @@ and backend_forward_instantiations.cxx.
 */
 
 // clang-format off
-oneapi::mkl::dft::BACKEND::create_commit,
-oneapi::mkl::dft::BACKEND::create_commit,
-oneapi::mkl::dft::BACKEND::create_commit,
-oneapi::mkl::dft::BACKEND::create_commit,
+oneapi::math::dft::BACKEND::create_commit,
+oneapi::math::dft::BACKEND::create_commit,
+oneapi::math::dft::BACKEND::create_commit,
+oneapi::math::dft::BACKEND::create_commit,
 // clang-format on
 
 #undef ONEAPI_MKL_DFT_BACKEND_SIGNATURES
diff --git a/src/dft/backends/cufft/CMakeLists.txt b/src/dft/backends/cufft/CMakeLists.txt
index 010905546..dfb28099f 100644
--- a/src/dft/backends/cufft/CMakeLists.txt
+++ b/src/dft/backends/cufft/CMakeLists.txt
@@ -17,33 +17,34 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_dft_cufft)
+set(LIB_NAME onemath_dft_cufft)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   descriptor.cpp
   commit.cpp
   forward.cpp
   backward.cpp
-  $<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_dft_cufft_wrappers.cpp>
+  $<$<BOOL:${BUILD_SHARED_LIBS}>: cufft_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_dft ${LIB_NAME})
+add_dependencies(onemath_backend_libs_dft ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 target_include_directories(${LIB_NAME}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if (${CMAKE_VERSION} VERSION_LESS "3.17.0")
   find_package(CUDA REQUIRED)
@@ -54,17 +55,17 @@ else()
   target_link_libraries(${LIB_OBJ} PRIVATE CUDA::cufft CUDA::cuda_driver)
 endif()
 
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL)
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -77,8 +78,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/dft/backends/cufft/backward.cpp b/src/dft/backends/cufft/backward.cpp
index 4c0f76a70..475f1ea49 100644
--- a/src/dft/backends/cufft/backward.cpp
+++ b/src/dft/backends/cufft/backward.cpp
@@ -23,18 +23,18 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp"
-#include "oneapi/mkl/dft/types.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp"
+#include "oneapi/math/dft/types.hpp"
 
 #include "execute_helper.hpp"
 #include "../../execute_helper_generic.hpp"
 
 #include <cufft.h>
 
-namespace oneapi::mkl::dft::cufft {
+namespace oneapi::math::dft::cufft {
 namespace detail {
 //forward declaration
 template <dft::precision prec, dft::domain dom>
@@ -49,8 +49,8 @@ cufftHandle get_bwd_plan(dft::detail::commit_impl<prec, dom>* commit) {
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     const std::string func_name = "compute_backward(desc, inout)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -62,7 +62,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         offsets[0] *= 2; // offset is supplied in complex but we offset scalar pointer
         if (offsets[1] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -86,17 +86,17 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
-                                    sycl::buffer<scalar<descriptor_type>, 1>&) {
-    throw oneapi::mkl::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im)",
-                                     "cuFFT does not support real-real complex storage.");
+ONEMATH_EXPORT void compute_backward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
+                                     sycl::buffer<scalar<descriptor_type>, 1>&) {
+    throw oneapi::math::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im)",
+                                      "cuFFT does not support real-real complex storage.");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<bwd<descriptor_type>, 1>& in,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<bwd<descriptor_type>, 1>& in,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& out) {
     const std::string func_name = "compute_backward(desc, in, out)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
@@ -107,7 +107,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         if (offsets[1] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -137,20 +137,20 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
-                                    sycl::buffer<scalar<descriptor_type>, 1>&,
-                                    sycl::buffer<scalar<descriptor_type>, 1>&,
-                                    sycl::buffer<scalar<descriptor_type>, 1>&) {
-    throw oneapi::mkl::unimplemented("DFT", "compute_backward(desc, in_re, in_im, out_re, out_im)",
-                                     "cuFFT does not support real-real complex storage.");
+ONEMATH_EXPORT void compute_backward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
+                                     sycl::buffer<scalar<descriptor_type>, 1>&,
+                                     sycl::buffer<scalar<descriptor_type>, 1>&,
+                                     sycl::buffer<scalar<descriptor_type>, 1>&) {
+    throw oneapi::math::unimplemented("DFT", "compute_backward(desc, in_re, in_im, out_re, out_im)",
+                                      "cuFFT does not support real-real complex storage.");
 }
 
 //USM version
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                            const std::vector<sycl::event>& dependencies) {
     const std::string func_name = "compute_backward(desc, inout, dependencies)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -162,7 +162,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         offsets[0] *= 2; // offset is supplied in complex but we offset scalar pointer
         if (offsets[1] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -185,19 +185,19 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type&, scalar<descriptor_type>*,
-                                           scalar<descriptor_type>*,
-                                           const std::vector<sycl::event>&) {
-    throw oneapi::mkl::unimplemented("DFT",
-                                     "compute_backward(desc, inout_re, inout_im, dependencies)",
-                                     "cuFFT does not support real-real complex storage.");
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type&, scalar<descriptor_type>*,
+                                            scalar<descriptor_type>*,
+                                            const std::vector<sycl::event>&) {
+    throw oneapi::math::unimplemented("DFT",
+                                      "compute_backward(desc, inout_re, inout_im, dependencies)",
+                                      "cuFFT does not support real-real complex storage.");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
-                                           fwd<descriptor_type>* out,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
+                                            fwd<descriptor_type>* out,
+                                            const std::vector<sycl::event>& dependencies) {
     const std::string func_name = "compute_backward(desc, in, out, dependencies)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
@@ -208,7 +208,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor
 
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         if (offsets[1] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -231,16 +231,16 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type&, scalar<descriptor_type>*,
-                                           scalar<descriptor_type>*, scalar<descriptor_type>*,
-                                           scalar<descriptor_type>*,
-                                           const std::vector<sycl::event>&) {
-    throw oneapi::mkl::unimplemented("DFT",
-                                     "compute_backward(desc, in_re, in_im, out_re, out_im, deps)",
-                                     "cuFFT does not support real-real complex storage.");
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type&, scalar<descriptor_type>*,
+                                            scalar<descriptor_type>*, scalar<descriptor_type>*,
+                                            scalar<descriptor_type>*,
+                                            const std::vector<sycl::event>&) {
+    throw oneapi::math::unimplemented("DFT",
+                                      "compute_backward(desc, in_re, in_im, out_re, out_im, deps)",
+                                      "cuFFT does not support real-real complex storage.");
 }
 
 // Template function instantiations
 #include "dft/backends/backend_backward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::cufft
+} // namespace oneapi::math::dft::cufft
diff --git a/src/dft/backends/cufft/commit.cpp b/src/dft/backends/cufft/commit.cpp
index 8dd9b225b..b6d2164ff 100644
--- a/src/dft/backends/cufft/commit.cpp
+++ b/src/dft/backends/cufft/commit.cpp
@@ -27,19 +27,19 @@
 #include <algorithm>
 #include <optional>
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
-#include "oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp"
-#include "oneapi/mkl/dft/types.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp"
+#include "oneapi/math/dft/types.hpp"
 
 #include "../stride_helper.hpp"
 
 #include <cufft.h>
 #include <cuda.h>
 
-namespace oneapi::mkl::dft::cufft {
+namespace oneapi::math::dft::cufft {
 namespace detail {
 
 /// Commit impl class specialization for cuFFT.
@@ -49,18 +49,18 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
     using scalar_type = typename dft::detail::commit_impl<prec, dom>::scalar_type;
 
     // For real to complex transforms, the "type" arg also encodes the direction (e.g. CUFFT_R2C vs CUFFT_C2R) in the plan so we must have one for each direction.
-    // We also need this because oneMKL uses a directionless "FWD_DISTANCE" and "BWD_DISTANCE" while cuFFT uses a directional "idist" and "odist".
+    // We also need this because oneMath uses a directionless "FWD_DISTANCE" and "BWD_DISTANCE" while cuFFT uses a directional "idist" and "odist".
     // plans[0] is forward, plans[1] is backward
     std::array<std::optional<cufftHandle>, 2> plans = { std::nullopt, std::nullopt };
     std::int64_t offset_fwd_in, offset_fwd_out, offset_bwd_in, offset_bwd_out;
 
 public:
     cufft_commit(sycl::queue& queue, const dft::detail::dft_values<prec, dom>& config_values)
-            : oneapi::mkl::dft::detail::commit_impl<prec, dom>(queue, backend::cufft,
-                                                               config_values) {
+            : oneapi::math::dft::detail::commit_impl<prec, dom>(queue, backend::cufft,
+                                                                config_values) {
         if constexpr (prec == dft::detail::precision::DOUBLE) {
             if (!queue.get_device().has(sycl::aspect::fp64)) {
-                throw mkl::exception("DFT", "commit", "Device does not support double precision.");
+                throw math::exception("DFT", "commit", "Device does not support double precision.");
             }
         }
     }
@@ -69,15 +69,15 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
         auto fix_context = plans[0].has_value() || plans[1].has_value();
         if (plans[0]) {
             if (cufftDestroy(plans[0].value()) != CUFFT_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", __FUNCTION__,
-                                     "Failed to destroy forward cuFFT plan.");
+                throw math::exception("dft/backends/cufft", __FUNCTION__,
+                                      "Failed to destroy forward cuFFT plan.");
             }
             plans[0] = std::nullopt;
         }
         if (plans[1]) {
             if (cufftDestroy(plans[1].value()) != CUFFT_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", __FUNCTION__,
-                                     "Failed to destroy backward cuFFT plan.");
+                throw math::exception("dft/backends/cufft", __FUNCTION__,
+                                      "Failed to destroy backward cuFFT plan.");
             }
             plans[1] = std::nullopt;
         }
@@ -87,8 +87,8 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
                 sycl::get_native<sycl::backend::ext_oneapi_cuda>(this->get_queue().get_device());
             CUcontext interopContext;
             if (cuDevicePrimaryCtxRetain(&interopContext, interopDevice) != CUDA_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", __FUNCTION__,
-                                     "Failed to change cuda context.");
+                throw math::exception("dft/backends/cufft", __FUNCTION__,
+                                      "Failed to change cuda context.");
             }
         }
     }
@@ -96,13 +96,13 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
     void commit(const dft::detail::dft_values<prec, dom>& config_values) override {
         // this could be a recommit
         this->external_workspace_helper_ =
-            oneapi::mkl::dft::detail::external_workspace_helper<prec, dom>(
+            oneapi::math::dft::detail::external_workspace_helper<prec, dom>(
                 config_values.workspace_placement ==
-                oneapi::mkl::dft::detail::config_value::WORKSPACE_EXTERNAL);
+                oneapi::math::dft::detail::config_value::WORKSPACE_EXTERNAL);
         clean_plans();
 
         if (config_values.fwd_scale != 1.0 || config_values.bwd_scale != 1.0) {
-            throw mkl::unimplemented(
+            throw math::unimplemented(
                 "dft/backends/cufft", __FUNCTION__,
                 "cuFFT does not support values other than 1 for FORWARD/BACKWARD_SCALE");
         }
@@ -164,14 +164,14 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
         if constexpr (dom == dft::domain::REAL) {
             if ((a_min != stride_vecs.vec_a.begin() + rank) ||
                 (b_min != stride_vecs.vec_b.begin() + rank)) {
-                throw mkl::unimplemented(
+                throw math::unimplemented(
                     "dft/backends/cufft", __FUNCTION__,
                     "cufft requires the last stride to be the the smallest one for real transforms!");
             }
         }
         else {
             if (a_min - stride_vecs.vec_a.begin() != b_min - stride_vecs.vec_b.begin()) {
-                throw mkl::unimplemented(
+                throw math::unimplemented(
                     "dft/backends/cufft", __FUNCTION__,
                     "cufft requires that if ordered by stride length, the order of strides is the same for input/output or fwd/bwd strides!");
             }
@@ -192,7 +192,7 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
         }
         for (int i = 1; i < rank; i++) {
             if ((stride_vecs.vec_a[i] % a_stride != 0) || (stride_vecs.vec_b[i] % b_stride != 0)) {
-                throw mkl::unimplemented(
+                throw math::unimplemented(
                     "dft/backends/cufft", __FUNCTION__,
                     "cufft requires a stride to be divisible by all smaller strides!");
             }
@@ -202,7 +202,7 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
         if (rank > 2) {
             if (stride_vecs.vec_a[1] > stride_vecs.vec_a[2] &&
                 stride_vecs.vec_b[1] < stride_vecs.vec_b[2]) {
-                throw mkl::unimplemented(
+                throw math::unimplemented(
                     "dft/backends/cufft", __FUNCTION__,
                     "cufft requires that if ordered by stride length, the order of strides is the same for input and output strides!");
             }
@@ -215,7 +215,7 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
             }
             if ((stride_vecs.vec_a[1] % stride_vecs.vec_a[2] != 0) ||
                 (stride_vecs.vec_b[1] % stride_vecs.vec_b[2] != 0)) {
-                throw mkl::unimplemented(
+                throw math::unimplemented(
                     "dft/backends/cufft", __FUNCTION__,
                     "cufft requires a stride to be divisible by all smaller strides!");
             }
@@ -252,14 +252,14 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
                                   : check_stride_validity(stride_vecs.bwd_out, stride_vecs.bwd_in);
 
         if (!valid_forward && !valid_backward) {
-            throw mkl::exception("dft/backends/cufft", __FUNCTION__, "Invalid strides.");
+            throw math::exception("dft/backends/cufft", __FUNCTION__, "Invalid strides.");
         }
 
         if (valid_forward) {
             cufftHandle fwd_plan;
             auto res = cufftCreate(&fwd_plan);
             if (res != CUFFT_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", __FUNCTION__, "cufftCreate failed.");
+                throw math::exception("dft/backends/cufft", __FUNCTION__, "cufftCreate failed.");
             }
             apply_external_workspace_setting(fwd_plan, config_values.workspace_placement);
             res = cufftPlanMany(&fwd_plan, // plan
@@ -276,8 +276,8 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
             );
 
             if (res != CUFFT_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", __FUNCTION__,
-                                     "Failed to create forward cuFFT plan.");
+                throw math::exception("dft/backends/cufft", __FUNCTION__,
+                                      "Failed to create forward cuFFT plan.");
             }
 
             plans[0] = fwd_plan;
@@ -287,7 +287,7 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
             cufftHandle bwd_plan;
             auto res = cufftCreate(&bwd_plan);
             if (res != CUFFT_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", __FUNCTION__, "cufftCreate failed.");
+                throw math::exception("dft/backends/cufft", __FUNCTION__, "cufftCreate failed.");
             }
             apply_external_workspace_setting(bwd_plan, config_values.workspace_placement);
             // flip fwd_distance and bwd_distance because cuFFt uses input distance and output distance.
@@ -304,8 +304,8 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
                                 batch // batch
             );
             if (res != CUFFT_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", __FUNCTION__,
-                                     "Failed to create backward cuFFT plan.");
+                throw math::exception("dft/backends/cufft", __FUNCTION__,
+                                      "Failed to create backward cuFFT plan.");
             }
             plans[1] = bwd_plan;
         }
@@ -320,8 +320,8 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
         if (workspace_setting == config_value::WORKSPACE_EXTERNAL) {
             auto res = cufftSetAutoAllocation(handle, 0);
             if (res != CUFFT_SUCCESS) {
-                throw mkl::exception("dft/backends/cufft", "commit",
-                                     "cufftSetAutoAllocation(plan, 0) failed.");
+                throw math::exception("dft/backends/cufft", "commit",
+                                      "cufftSetAutoAllocation(plan, 0) failed.");
             }
         }
     }
@@ -357,7 +357,7 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
                     auto stream = ih.get_native_queue<sycl::backend::ext_oneapi_cuda>();
                     auto result = cufftSetStream(plan, stream);
                     if (result != CUFFT_SUCCESS) {
-                        throw oneapi::mkl::exception(
+                        throw oneapi::math::exception(
                             "dft/backends/cufft", "set_workspace",
                             "cufftSetStream returned " + std::to_string(result));
                     }
@@ -459,4 +459,4 @@ get_offsets_bwd<dft::detail::precision::DOUBLE, dft::detail::domain::COMPLEX>(
     dft::detail::commit_impl<dft::detail::precision::DOUBLE, dft::detail::domain::COMPLEX>*);
 } //namespace detail
 
-} // namespace oneapi::mkl::dft::cufft
+} // namespace oneapi::math::dft::cufft
diff --git a/src/dft/backends/cufft/mkl_dft_cufft_wrappers.cpp b/src/dft/backends/cufft/cufft_wrappers.cpp
similarity index 89%
rename from src/dft/backends/cufft/mkl_dft_cufft_wrappers.cpp
rename to src/dft/backends/cufft/cufft_wrappers.cpp
index 93d3aae11..6cbb9cc99 100644
--- a/src/dft/backends/cufft/mkl_dft_cufft_wrappers.cpp
+++ b/src/dft/backends/cufft/cufft_wrappers.cpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp"
+#include "oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp"
 #include "dft/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         cufft
 
-extern "C" dft_function_table_t mkl_dft_table = {
+extern "C" dft_function_table_t onemath_dft_table = {
     WRAPPER_VERSION,
 #include "dft/backends/backend_wrappers.cxx"
 };
diff --git a/src/dft/backends/cufft/descriptor.cpp b/src/dft/backends/cufft/descriptor.cpp
index bf26b600f..40d96cdee 100644
--- a/src/dft/backends/cufft/descriptor.cpp
+++ b/src/dft/backends/cufft/descriptor.cpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 #include "../../descriptor.cxx"
 
-#include "oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp"
+#include "oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp"
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 template <precision prec, domain dom>
 void descriptor<prec, dom>::commit(backend_selector<backend::cufft> selector) {
@@ -42,4 +42,4 @@ template void descriptor<precision::DOUBLE, domain::COMPLEX>::commit(
     backend_selector<backend::cufft>);
 template void descriptor<precision::DOUBLE, domain::REAL>::commit(backend_selector<backend::cufft>);
 
-} //namespace oneapi::mkl::dft::detail
+} //namespace oneapi::math::dft::detail
diff --git a/src/dft/backends/cufft/execute_helper.hpp b/src/dft/backends/cufft/execute_helper.hpp
index 88c2e3dba..da485fea2 100644
--- a/src/dft/backends/cufft/execute_helper.hpp
+++ b/src/dft/backends/cufft/execute_helper.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_SRC_EXECUTE_HELPER_CUFFT_HPP_
-#define _ONEMKL_DFT_SRC_EXECUTE_HELPER_CUFFT_HPP_
+#ifndef _ONEMATH_DFT_SRC_EXECUTE_HELPER_CUFFT_HPP_
+#define _ONEMATH_DFT_SRC_EXECUTE_HELPER_CUFFT_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,35 +26,35 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
-#include "oneapi/mkl/dft/types.hpp"
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/types.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 #include <cuda.h>
 #include <cufft.h>
 
-namespace oneapi::mkl::dft::cufft::detail {
+namespace oneapi::math::dft::cufft::detail {
 
 template <dft::precision prec, dft::domain dom>
 inline dft::detail::commit_impl<prec, dom>* checked_get_commit(
     dft::detail::descriptor<prec, dom>& desc) {
     auto commit_handle = dft::detail::get_commit(desc);
     if (commit_handle == nullptr || commit_handle->get_backend() != backend::cufft) {
-        throw mkl::invalid_argument("dft/backends/cufft", "get_commit",
-                                    "DFT descriptor has not been commited for cuFFT");
+        throw math::invalid_argument("dft/backends/cufft", "get_commit",
+                                     "DFT descriptor has not been commited for cuFFT");
     }
     return commit_handle;
 }
 
-/// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match
+/// Throw an math::invalid_argument if the runtime param in the descriptor does not match
 /// the expected value.
 template <dft::config_param Param, dft::config_value Expected, typename DescT>
 inline auto expect_config(DescT& desc, const char* message) {
     dft::config_value actual{ 0 };
     desc.get_value(Param, &actual);
     if (actual != Expected) {
-        throw mkl::invalid_argument("dft/backends/cufft", "expect_config", message);
+        throw math::invalid_argument("dft/backends/cufft", "expect_config", message);
     }
 }
 
@@ -73,16 +73,18 @@ void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, v
                 auto result = cufftExecR2C(plan, reinterpret_cast<cufftReal*>(input),
                                            reinterpret_cast<cufftComplex*>(output));
                 if (result != CUFFT_SUCCESS) {
-                    throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                                 "cufftExecR2C returned " + std::to_string(result));
+                    throw oneapi::math::exception(
+                        "dft/backends/cufft", func,
+                        "cufftExecR2C returned " + std::to_string(result));
                 }
             }
             else {
                 auto result = cufftExecD2Z(plan, reinterpret_cast<cufftDoubleReal*>(input),
                                            reinterpret_cast<cufftDoubleComplex*>(output));
                 if (result != CUFFT_SUCCESS) {
-                    throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                                 "cufftExecD2Z returned " + std::to_string(result));
+                    throw oneapi::math::exception(
+                        "dft/backends/cufft", func,
+                        "cufftExecD2Z returned " + std::to_string(result));
                 }
             }
         }
@@ -91,16 +93,18 @@ void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, v
                 auto result = cufftExecC2R(plan, reinterpret_cast<cufftComplex*>(input),
                                            reinterpret_cast<cufftReal*>(output));
                 if (result != CUFFT_SUCCESS) {
-                    throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                                 "cufftExecC2R returned " + std::to_string(result));
+                    throw oneapi::math::exception(
+                        "dft/backends/cufft", func,
+                        "cufftExecC2R returned " + std::to_string(result));
                 }
             }
             else {
                 auto result = cufftExecZ2D(plan, reinterpret_cast<cufftDoubleComplex*>(input),
                                            reinterpret_cast<cufftDoubleReal*>(output));
                 if (result != CUFFT_SUCCESS) {
-                    throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                                 "cufftExecZ2D returned " + std::to_string(result));
+                    throw oneapi::math::exception(
+                        "dft/backends/cufft", func,
+                        "cufftExecZ2D returned " + std::to_string(result));
                 }
             }
         }
@@ -111,8 +115,8 @@ void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, v
                 cufftExecC2C(plan, reinterpret_cast<cufftComplex*>(input),
                              reinterpret_cast<cufftComplex*>(output), static_cast<int>(dir));
             if (result != CUFFT_SUCCESS) {
-                throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                             "cufftExecC2C returned " + std::to_string(result));
+                throw oneapi::math::exception("dft/backends/cufft", func,
+                                              "cufftExecC2C returned " + std::to_string(result));
             }
         }
         else {
@@ -120,8 +124,8 @@ void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, v
                 cufftExecZ2Z(plan, reinterpret_cast<cufftDoubleComplex*>(input),
                              reinterpret_cast<cufftDoubleComplex*>(output), static_cast<int>(dir));
             if (result != CUFFT_SUCCESS) {
-                throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                             "cufftExecZ2Z returned " + std::to_string(result));
+                throw oneapi::math::exception("dft/backends/cufft", func,
+                                              "cufftExecZ2Z returned " + std::to_string(result));
             }
         }
     }
@@ -131,8 +135,8 @@ void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, v
     // as complete early.
     auto result = cuStreamSynchronize(stream);
     if (result != CUDA_SUCCESS) {
-        throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                     "cuStreamSynchronize returned " + std::to_string(result));
+        throw oneapi::math::exception("dft/backends/cufft", func,
+                                      "cuStreamSynchronize returned " + std::to_string(result));
     }
 #endif
 }
@@ -141,12 +145,12 @@ inline CUstream setup_stream(const std::string& func, sycl::interop_handle ih, c
     auto stream = ih.get_native_queue<sycl::backend::ext_oneapi_cuda>();
     auto result = cufftSetStream(plan, stream);
     if (result != CUFFT_SUCCESS) {
-        throw oneapi::mkl::exception("dft/backends/cufft", func,
-                                     "cufftSetStream returned " + std::to_string(result));
+        throw oneapi::math::exception("dft/backends/cufft", func,
+                                      "cufftSetStream returned " + std::to_string(result));
     }
     return stream;
 }
 
-} // namespace oneapi::mkl::dft::cufft::detail
+} // namespace oneapi::math::dft::cufft::detail
 
-#endif // _ONEMKL_DFT_SRC_EXECUTE_HELPER_CUFFT_HPP_
+#endif // _ONEMATH_DFT_SRC_EXECUTE_HELPER_CUFFT_HPP_
diff --git a/src/dft/backends/cufft/forward.cpp b/src/dft/backends/cufft/forward.cpp
index 0c003b844..6b2867b5f 100644
--- a/src/dft/backends/cufft/forward.cpp
+++ b/src/dft/backends/cufft/forward.cpp
@@ -24,18 +24,18 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp"
-#include "oneapi/mkl/dft/types.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp"
+#include "oneapi/math/dft/types.hpp"
 
 #include "execute_helper.hpp"
 #include "../../execute_helper_generic.hpp"
 
 #include <cufft.h>
 
-namespace oneapi::mkl::dft::cufft {
+namespace oneapi::math::dft::cufft {
 
 namespace detail {
 //forward declaration
@@ -52,8 +52,8 @@ cufftHandle get_fwd_plan(dft::detail::commit_impl<prec, dom>* commit) {
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     const std::string func_name = "compute_forward(desc, inout)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -64,7 +64,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         if (offsets[0] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -89,16 +89,17 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
-                                   sycl::buffer<scalar<descriptor_type>, 1>&) {
-    throw oneapi::mkl::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im)",
-                                     "cuFFT does not support real-real complex storage.");
+ONEMATH_EXPORT void compute_forward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
+                                    sycl::buffer<scalar<descriptor_type>, 1>&) {
+    throw oneapi::math::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im)",
+                                      "cuFFT does not support real-real complex storage.");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descriptor_type>, 1>& in,
-                                   sycl::buffer<bwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& in,
+                                    sycl::buffer<bwd<descriptor_type>, 1>& out) {
     const std::string func_name = "compute_forward(desc, in, out)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
@@ -109,7 +110,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descr
 
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         if (offsets[0] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -139,20 +140,20 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descr
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
-                                   sycl::buffer<scalar<descriptor_type>, 1>&,
-                                   sycl::buffer<scalar<descriptor_type>, 1>&,
-                                   sycl::buffer<scalar<descriptor_type>, 1>&) {
-    throw oneapi::mkl::unimplemented("DFT", "compute_forward(desc, in_re, in_im, out_re, out_im)",
-                                     "cuFFT does not support real-real complex storage.");
+ONEMATH_EXPORT void compute_forward(descriptor_type&, sycl::buffer<scalar<descriptor_type>, 1>&,
+                                    sycl::buffer<scalar<descriptor_type>, 1>&,
+                                    sycl::buffer<scalar<descriptor_type>, 1>&,
+                                    sycl::buffer<scalar<descriptor_type>, 1>&) {
+    throw oneapi::math::unimplemented("DFT", "compute_forward(desc, in_re, in_im, out_re, out_im)",
+                                      "cuFFT does not support real-real complex storage.");
 }
 
 //USM version
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                           const std::vector<sycl::event>& dependencies) {
     const std::string func_name = "compute_forward(desc, inout, dependencies)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -163,7 +164,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         if (offsets[0] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -187,19 +188,19 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type&, scalar<descriptor_type>*,
-                                          scalar<descriptor_type>*,
-                                          const std::vector<sycl::event>&) {
-    throw oneapi::mkl::unimplemented("DFT",
-                                     "compute_forward(desc, inout_re, inout_im, dependencies)",
-                                     "cuFFT does not support real-real complex storage.");
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type&, scalar<descriptor_type>*,
+                                           scalar<descriptor_type>*,
+                                           const std::vector<sycl::event>&) {
+    throw oneapi::math::unimplemented("DFT",
+                                      "compute_forward(desc, inout_re, inout_im, dependencies)",
+                                      "cuFFT does not support real-real complex storage.");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
-                                          bwd<descriptor_type>* out,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
+                                           bwd<descriptor_type>* out,
+                                           const std::vector<sycl::event>& dependencies) {
     const std::string func_name = "compute_forward(desc, in, out, dependencies)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
@@ -210,7 +211,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
     if constexpr (std::is_floating_point_v<fwd<descriptor_type>>) {
         if (offsets[0] % 2 != 0) {
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "DFT", func_name,
                 "cuFFT requires offset (first value in strides) to be multiple of 2!");
         }
@@ -233,11 +234,11 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type&, scalar<descriptor_type>*,
-                                          scalar<descriptor_type>*, scalar<descriptor_type>*,
-                                          scalar<descriptor_type>*,
-                                          const std::vector<sycl::event>&) {
-    throw oneapi::mkl::unimplemented(
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type&, scalar<descriptor_type>*,
+                                           scalar<descriptor_type>*, scalar<descriptor_type>*,
+                                           scalar<descriptor_type>*,
+                                           const std::vector<sycl::event>&) {
+    throw oneapi::math::unimplemented(
         "DFT", "compute_forward(desc, in_re, in_im, out_re, out_im, dependencies)",
         "cuFFT does not support real-real complex storage.");
 }
@@ -245,4 +246,4 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type&, scalar<descriptor_ty
 // Template function instantiations
 #include "dft/backends/backend_forward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::cufft
+} // namespace oneapi::math::dft::cufft
diff --git a/src/dft/backends/descriptor.cpp b/src/dft/backends/descriptor.cpp
index 5c3e163ca..9a3d12911 100644
--- a/src/dft/backends/descriptor.cpp
+++ b/src/dft/backends/descriptor.cpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/descriptor.hpp"
-#include "oneapi/mkl/dft/detail/dft_loader.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
+#include "oneapi/math/dft/detail/dft_loader.hpp"
 
 #include "../descriptor.cxx"
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 template <precision prec, domain dom>
 void descriptor<prec, dom>::commit(sycl::queue& queue) {
@@ -39,4 +39,4 @@ template void descriptor<precision::SINGLE, domain::REAL>::commit(sycl::queue&);
 template void descriptor<precision::DOUBLE, domain::COMPLEX>::commit(sycl::queue&);
 template void descriptor<precision::DOUBLE, domain::REAL>::commit(sycl::queue&);
 
-} //namespace oneapi::mkl::dft::detail
+} //namespace oneapi::math::dft::detail
diff --git a/src/dft/backends/mklcpu/CMakeLists.txt b/src/dft/backends/mklcpu/CMakeLists.txt
index 6d0f1276d..d79327087 100644
--- a/src/dft/backends/mklcpu/CMakeLists.txt
+++ b/src/dft/backends/mklcpu/CMakeLists.txt
@@ -17,12 +17,13 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_dft_mklcpu)
+set(LIB_NAME onemath_dft_mklcpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 include(WarningsUtils)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   commit.cpp
   descriptor.cpp
@@ -30,37 +31,38 @@ add_library(${LIB_OBJ} OBJECT
   backward.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_dft_cpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_dft ${LIB_NAME})
+add_dependencies(onemath_backend_libs_dft ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 target_include_directories(${LIB_NAME}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/src
+          ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET ${LIB_OBJ} SOURCES ${SOURCES})
 endif()
 
 if(TARGET MKL::MKL_SYCL::DFT)
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_SYCL::DFT
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 else()
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_DPCPP
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 endif()
 
@@ -69,10 +71,10 @@ set_target_properties(${LIB_OBJ} PROPERTIES
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -85,8 +87,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/dft/backends/mklcpu/backward.cpp b/src/dft/backends/mklcpu/backward.cpp
index fe94691bc..a210c247e 100644
--- a/src/dft/backends/mklcpu/backward.cpp
+++ b/src/dft/backends/mklcpu/backward.cpp
@@ -23,18 +23,18 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/descriptor.hpp"
-#include "oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp"
-#include "oneapi/mkl/dft/types.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
+#include "oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp"
+#include "oneapi/math/dft/types.hpp"
 
 #include "dft/backends/mklcpu/commit_derived_impl.hpp"
 
-// MKLCPU header
-#include "mkl_dfti.h"
+// Intel(R) oneMKL header
+#include <mkl_dfti.h>
 
-namespace oneapi::mkl::dft::mklcpu {
+namespace oneapi::math::dft::mklcpu {
 namespace detail {
 
 // BUFFER version
@@ -43,27 +43,27 @@ template <dft::precision prec, dft::domain dom>
 inline void check_bwd_commit(dft::descriptor<prec, dom>& desc) {
     auto commit_handle = dft::detail::get_commit(desc);
     if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklcpu) {
-        throw mkl::invalid_argument("DFT", "computer_backward",
-                                    "DFT descriptor has not been commited for MKLCPU");
+        throw math::invalid_argument("DFT", "computer_backward",
+                                     "DFT descriptor has not been commited for MKLCPU");
     }
 
     auto mklcpu_desc = reinterpret_cast<detail::mklcpu_desc_t*>(commit_handle->get_handle());
     MKL_LONG commit_status{ DFTI_UNCOMMITTED };
     DftiGetValue(mklcpu_desc[1], DFTI_COMMIT_STATUS, &commit_status);
     if (commit_status != DFTI_COMMITTED) {
-        throw mkl::invalid_argument("DFT", "compute_backward",
-                                    "MKLCPU DFT descriptor was not successfully committed.");
+        throw math::invalid_argument("DFT", "compute_backward",
+                                     "MKLCPU DFT descriptor was not successfully committed.");
     }
 }
 
-// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match
+// Throw an math::invalid_argument if the runtime param in the descriptor does not match
 // the expected value.
 template <dft::detail::config_param Param, dft::detail::config_value Expected, typename DescT>
 inline auto expect_config(DescT& desc, const char* message) {
     dft::detail::config_value actual{ 0 };
     desc.get_value(Param, &actual);
     if (actual != Expected) {
-        throw mkl::invalid_argument("DFT", "compute_backward", message);
+        throw math::invalid_argument("DFT", "compute_backward", message);
     }
 }
 // convert the base commit class to derived cpu commit class
@@ -77,8 +77,8 @@ auto get_buffer(commit_t<prec, dom>* commit_handle) {
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
     auto commit_handle = dft::detail::get_commit(desc);
@@ -94,7 +94,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
             DFT_ERROR status =
                 DftiComputeBackward(desc_acc[detail::DIR::bwd], detail::acc_to_ptr(inout_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -104,9 +104,9 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -126,7 +126,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
             DFT_ERROR status = DftiComputeBackward(
                 desc_acc[detail::DIR::bwd], detail::acc_to_ptr(re_acc), detail::acc_to_ptr(im_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -136,9 +136,9 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<bwd<descriptor_type>, 1>& in,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<bwd<descriptor_type>, 1>& in,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& out) {
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
                                                                   "Unexpected value for placement");
@@ -159,7 +159,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
             DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], in_ptr,
                                                    detail::acc_to_ptr(out_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -169,11 +169,11 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& in_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& in_im,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& out_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& in_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& in_im,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& out_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -198,7 +198,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
                 DftiComputeBackward(desc_acc[detail::DIR::bwd], inre_ptr, inim_ptr,
                                     detail::acc_to_ptr(outre_acc), detail::acc_to_ptr(outim_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -210,8 +210,8 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                            const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
 
@@ -227,7 +227,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor
         detail::host_task<class host_usm_kernel_back_inplace>(cgh, [=]() {
             DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], inout);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -237,9 +237,10 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
-                                           scalar<descriptor_type>* inout_im,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc,
+                                            scalar<descriptor_type>* inout_re,
+                                            scalar<descriptor_type>* inout_im,
+                                            const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -255,7 +256,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descrip
         detail::host_task<class host_usm_kernel_split_back_inplace>(cgh, [=]() {
             DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], inout_re, inout_im);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -265,9 +266,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descrip
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
-                                           fwd<descriptor_type>* out,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
+                                            fwd<descriptor_type>* out,
+                                            const std::vector<sycl::event>& dependencies) {
     // Check: inplace, complex storage
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
@@ -285,7 +286,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor
         detail::host_task<class host_usm_kernel_back_outofplace>(cgh, [=]() {
             DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], in, out);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -295,11 +296,11 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* in_re,
-                                           scalar<descriptor_type>* in_im,
-                                           scalar<descriptor_type>* out_re,
-                                           scalar<descriptor_type>* out_im,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* in_re,
+                                            scalar<descriptor_type>* in_im,
+                                            scalar<descriptor_type>* out_re,
+                                            scalar<descriptor_type>* out_im,
+                                            const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -316,7 +317,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descrip
             DFT_ERROR status =
                 DftiComputeBackward(desc_acc[detail::DIR::bwd], in_re, in_im, out_re, out_im);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/backends/mklcpu", "compute_backward",
                     std::string("DftiComputeBackward failed : ") + DftiErrorMessage(status));
             }
@@ -327,4 +328,4 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descrip
 // Template function instantiations
 #include "dft/backends/backend_backward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::mklcpu
+} // namespace oneapi::math::dft::mklcpu
diff --git a/src/dft/backends/mklcpu/commit.cpp b/src/dft/backends/mklcpu/commit.cpp
index 1ec8aef9c..a8dca834b 100644
--- a/src/dft/backends/mklcpu/commit.cpp
+++ b/src/dft/backends/mklcpu/commit.cpp
@@ -23,22 +23,24 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
-#include "oneapi/mkl/dft/types.hpp"
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/detail/backends.hpp"
+#include "oneapi/math/dft/types.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 
-#include "oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp"
+#include "oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp"
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
 
 #include "dft/backends/mklcpu/commit_derived_impl.hpp"
 #include "../stride_helper.hpp"
-#include "mkl_service.h"
-#include "mkl_dfti.h"
+
+// Intel(R) oneMKL header
+#include <mkl_dfti.h>
+#include <mkl_service.h>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace mklcpu {
 namespace detail {
@@ -46,7 +48,7 @@ namespace detail {
 template <dft::detail::precision prec, dft::detail::domain dom>
 commit_derived_impl<prec, dom>::commit_derived_impl(
     sycl::queue queue, const dft::detail::dft_values<prec, dom>& config_values)
-        : oneapi::mkl::dft::detail::commit_impl<prec, dom>(queue, backend::mklcpu, config_values) {
+        : oneapi::math::dft::detail::commit_impl<prec, dom>(queue, backend::mklcpu, config_values) {
     // create the descriptor once for the lifetime of the descriptor class
     DFT_ERROR status[2] = { DFTI_BAD_DESCRIPTOR, DFTI_BAD_DESCRIPTOR };
 
@@ -66,7 +68,7 @@ commit_derived_impl<prec, dom>::commit_derived_impl(
         std::string err = std::string("DftiCreateDescriptor failed with status : ") +
                           DftiErrorMessage(status[0]) + std::string(", ") +
                           DftiErrorMessage(status[1]);
-        throw oneapi::mkl::exception("dft/backends/mklcpu", "create_descriptor", err);
+        throw oneapi::math::exception("dft/backends/mklcpu", "create_descriptor", err);
     }
 }
 
@@ -81,9 +83,9 @@ template <dft::detail::precision prec, dft::detail::domain dom>
 void commit_derived_impl<prec, dom>::commit(
     const dft::detail::dft_values<prec, dom>& config_values) {
     this->external_workspace_helper_ =
-        oneapi::mkl::dft::detail::external_workspace_helper<prec, dom>(
+        oneapi::math::dft::detail::external_workspace_helper<prec, dom>(
             config_values.workspace_placement ==
-            oneapi::mkl::dft::detail::config_value::WORKSPACE_EXTERNAL);
+            oneapi::math::dft::detail::config_value::WORKSPACE_EXTERNAL);
     set_value(bidirection_handle.data(), config_values);
 
     this->get_queue()
@@ -104,7 +106,7 @@ void commit_derived_impl<prec, dom>::commit(
                     std::string err = std::string("DftiCommitDescriptor failed with status : ") +
                                       DftiErrorMessage(status[0]) + std::string(", ") +
                                       DftiErrorMessage(status[1]);
-                    throw oneapi::mkl::exception("dft/backends/mklcpu", "commit", err);
+                    throw oneapi::math::exception("dft/backends/mklcpu", "commit", err);
                 }
             });
         })
@@ -122,8 +124,8 @@ void commit_derived_impl<prec, dom>::set_value_item(mklcpu_desc_t hand, enum DFT
                                                     Args... args) {
     DFT_ERROR value_err = DftiSetValue(hand, name, args...);
     if (value_err != DFTI_NO_ERROR) {
-        throw oneapi::mkl::exception("dft/backends/mklcpu", "set_value_item",
-                                     DftiErrorMessage(value_err));
+        throw oneapi::math::exception("dft/backends/mklcpu", "set_value_item",
+                                      DftiErrorMessage(value_err));
     }
 }
 
@@ -166,18 +168,18 @@ void commit_derived_impl<prec, dom>::set_value(mklcpu_desc_t* descHandle,
                        to_mklcpu<config_param::PACKED_FORMAT>(config.packed_format));
         // Setting the workspace causes an FFT_INVALID_DESCRIPTOR.
         if (config.workspace != config_value::ALLOW) {
-            throw mkl::invalid_argument("dft/backends/mklcpu", "commit",
-                                        "MKLCPU only supports workspace set to allow");
+            throw math::invalid_argument("dft/backends/mklcpu", "commit",
+                                         "MKLCPU only supports workspace set to allow");
         }
         // Setting the ordering causes an FFT_INVALID_DESCRIPTOR. Check that default is used:
         if (config.ordering != dft::detail::config_value::ORDERED) {
-            throw mkl::invalid_argument("dft/backends/mklcpu", "commit",
-                                        "MKLCPU only supports ordered ordering.");
+            throw math::invalid_argument("dft/backends/mklcpu", "commit",
+                                         "MKLCPU only supports ordered ordering.");
         }
         // Setting the transpose causes an FFT_INVALID_DESCRIPTOR. Check that default is used:
         if (config.transpose != false) {
-            throw mkl::invalid_argument("dft/backends/mklcpu", "commit",
-                                        "MKLCPU only supports non-transposed.");
+            throw math::invalid_argument("dft/backends/mklcpu", "commit",
+                                         "MKLCPU only supports non-transposed.");
         }
     }
 }
@@ -208,5 +210,5 @@ create_commit(
 
 } // namespace mklcpu
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/dft/backends/mklcpu/commit_derived_impl.hpp b/src/dft/backends/mklcpu/commit_derived_impl.hpp
index 3551758a0..6e35b5735 100644
--- a/src/dft/backends/mklcpu/commit_derived_impl.hpp
+++ b/src/dft/backends/mklcpu/commit_derived_impl.hpp
@@ -17,18 +17,18 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_COMMIT_DERIVED_IMPL_HPP_
-#define _ONEMKL_DFT_COMMIT_DERIVED_IMPL_HPP_
+#ifndef _ONEMATH_DFT_COMMIT_DERIVED_IMPL_HPP_
+#define _ONEMATH_DFT_COMMIT_DERIVED_IMPL_HPP_
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 #include "dft/backends/mklcpu/mklcpu_helpers.hpp"
 
-// MKLCPU header
-#include "mkl_dfti.h"
+// Intel(R) oneMKL header
+#include <mkl_dfti.h>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace mklcpu {
 namespace detail {
@@ -82,7 +82,7 @@ using commit_derived_t = detail::commit_derived_impl<prec, dom>;
 } // namespace detail
 } // namespace mklcpu
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_DFT_COMMIT_DERIVED_IMPL_HPP_
+#endif // _ONEMATH_DFT_COMMIT_DERIVED_IMPL_HPP_
diff --git a/src/dft/backends/mklcpu/descriptor.cpp b/src/dft/backends/mklcpu/descriptor.cpp
index a72fdcfc3..a89203717 100644
--- a/src/dft/backends/mklcpu/descriptor.cpp
+++ b/src/dft/backends/mklcpu/descriptor.cpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 #include "../../descriptor.cxx"
 
-#include "oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp"
+#include "oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp"
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 template <precision prec, domain dom>
 void descriptor<prec, dom>::commit(backend_selector<backend::mklcpu> selector) {
@@ -44,4 +44,4 @@ template void descriptor<precision::DOUBLE, domain::COMPLEX>::commit(
 template void descriptor<precision::DOUBLE, domain::REAL>::commit(
     backend_selector<backend::mklcpu>);
 
-} //namespace oneapi::mkl::dft::detail
+} //namespace oneapi::math::dft::detail
diff --git a/src/dft/backends/mklcpu/forward.cpp b/src/dft/backends/mklcpu/forward.cpp
index 5d90b7854..304bb6d01 100644
--- a/src/dft/backends/mklcpu/forward.cpp
+++ b/src/dft/backends/mklcpu/forward.cpp
@@ -23,18 +23,18 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/descriptor.hpp"
-#include "oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp"
-#include "oneapi/mkl/dft/types.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
+#include "oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp"
+#include "oneapi/math/dft/types.hpp"
 
 #include "dft/backends/mklcpu/commit_derived_impl.hpp"
 
-// MKLCPU header
-#include "mkl_dfti.h"
+// Intel(R) oneMKL header
+#include <mkl_dfti.h>
 
-namespace oneapi::mkl::dft::mklcpu {
+namespace oneapi::math::dft::mklcpu {
 namespace detail {
 
 // BUFFER version
@@ -43,27 +43,27 @@ template <dft::precision prec, dft::domain dom>
 inline void check_fwd_commit(dft::descriptor<prec, dom>& desc) {
     auto commit_handle = dft::detail::get_commit(desc);
     if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklcpu) {
-        throw mkl::invalid_argument("DFT", "computer_forward",
-                                    "DFT descriptor has not been commited for MKLCPU");
+        throw math::invalid_argument("DFT", "computer_forward",
+                                     "DFT descriptor has not been commited for MKLCPU");
     }
 
     auto mklcpu_desc = reinterpret_cast<detail::mklcpu_desc_t*>(commit_handle->get_handle());
     MKL_LONG commit_status{ DFTI_UNCOMMITTED };
     DftiGetValue(mklcpu_desc[0], DFTI_COMMIT_STATUS, &commit_status);
     if (commit_status != DFTI_COMMITTED) {
-        throw mkl::invalid_argument("DFT", "compute_forward",
-                                    "MKLCPU DFT descriptor was not successfully committed.");
+        throw math::invalid_argument("DFT", "compute_forward",
+                                     "MKLCPU DFT descriptor was not successfully committed.");
     }
 }
 
-// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match
+// Throw an math::invalid_argument if the runtime param in the descriptor does not match
 // the expected value.
 template <dft::detail::config_param Param, dft::detail::config_value Expected, typename DescT>
 inline auto expect_config(DescT& desc, const char* message) {
     dft::detail::config_value actual{ 0 };
     desc.get_value(Param, &actual);
     if (actual != Expected) {
-        throw mkl::invalid_argument("DFT", "compute_forward", message);
+        throw math::invalid_argument("DFT", "compute_forward", message);
     }
 }
 
@@ -78,8 +78,8 @@ auto get_buffer(commit_t<prec, dom>* commit_handle) {
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
     auto commit_handle = dft::detail::get_commit(desc);
@@ -95,7 +95,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
             DFT_ERROR status =
                 DftiComputeForward(desc_acc[detail::DIR::fwd], detail::acc_to_ptr(inout_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -105,9 +105,9 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -127,7 +127,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
             DFT_ERROR status = DftiComputeForward(
                 desc_acc[detail::DIR::fwd], detail::acc_to_ptr(re_acc), detail::acc_to_ptr(im_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -137,8 +137,9 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descriptor_type>, 1>& in,
-                                   sycl::buffer<bwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& in,
+                                    sycl::buffer<bwd<descriptor_type>, 1>& out) {
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
                                                                   "Unexpected value for placement");
@@ -159,7 +160,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descr
             DFT_ERROR status =
                 DftiComputeForward(desc_acc[detail::DIR::fwd], in_ptr, detail::acc_to_ptr(out_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -169,11 +170,11 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descr
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& in_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& in_im,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& out_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& in_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& in_im,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& out_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -198,7 +199,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
                 DftiComputeForward(desc_acc[detail::DIR::fwd], inre_ptr, inim_ptr,
                                    detail::acc_to_ptr(outre_acc), detail::acc_to_ptr(outim_acc));
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -210,8 +211,8 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                           const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
 
@@ -228,7 +229,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
         detail::host_task<class host_usm_kernel_inplace>(cgh, [=]() {
             DFT_ERROR status = DftiComputeForward(desc_acc[detail::DIR::fwd], inout);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -238,9 +239,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
-                                          scalar<descriptor_type>* inout_im,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
+                                           scalar<descriptor_type>* inout_im,
+                                           const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -258,7 +259,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
         detail::host_task<class host_usm_kernel_split_inplace>(cgh, [=]() {
             DFT_ERROR status = DftiComputeForward(desc_acc[detail::DIR::fwd], inout_re, inout_im);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -268,9 +269,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
-                                          bwd<descriptor_type>* out,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
+                                           bwd<descriptor_type>* out,
+                                           const std::vector<sycl::event>& dependencies) {
     // Check: inplace
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
@@ -289,7 +290,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
         detail::host_task<class host_usm_kernel_outofplace>(cgh, [=]() {
             DFT_ERROR status = DftiComputeForward(desc_acc[detail::DIR::fwd], in, out);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -299,11 +300,11 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* in_re,
-                                          scalar<descriptor_type>* in_im,
-                                          scalar<descriptor_type>* out_re,
-                                          scalar<descriptor_type>* out_im,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* in_re,
+                                           scalar<descriptor_type>* in_im,
+                                           scalar<descriptor_type>* out_re,
+                                           scalar<descriptor_type>* out_im,
+                                           const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
@@ -322,7 +323,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
             DFT_ERROR status =
                 DftiComputeForward(desc_acc[detail::DIR::fwd], in_re, in_im, out_re, out_im);
             if (status != DFTI_NO_ERROR) {
-                throw oneapi::mkl::exception(
+                throw oneapi::math::exception(
                     "dft/forward/mklcpu", "compute_forward",
                     std::string("DftiComputeForward failed : ") + DftiErrorMessage(status));
             }
@@ -333,4 +334,4 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
 // Template function instantiations
 #include "dft/backends/backend_forward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::mklcpu
+} // namespace oneapi::math::dft::mklcpu
diff --git a/src/dft/backends/mklcpu/mkl_dft_cpu_wrappers.cpp b/src/dft/backends/mklcpu/mkl_dft_cpu_wrappers.cpp
index 463ab80f4..3c8193f3b 100644
--- a/src/dft/backends/mklcpu/mkl_dft_cpu_wrappers.cpp
+++ b/src/dft/backends/mklcpu/mkl_dft_cpu_wrappers.cpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/detail/mklcpu/onemkl_dft_mklcpu.hpp"
+#include "oneapi/math/dft/detail/mklcpu/onemath_dft_mklcpu.hpp"
 #include "dft/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         mklcpu
 
-extern "C" dft_function_table_t mkl_dft_table = {
+extern "C" dft_function_table_t onemath_dft_table = {
     WRAPPER_VERSION,
 #include "dft/backends/backend_wrappers.cxx"
 };
diff --git a/src/dft/backends/mklcpu/mklcpu_helpers.hpp b/src/dft/backends/mklcpu/mklcpu_helpers.hpp
index 55a8345c2..c3dc4db4b 100644
--- a/src/dft/backends/mklcpu/mklcpu_helpers.hpp
+++ b/src/dft/backends/mklcpu/mklcpu_helpers.hpp
@@ -17,16 +17,16 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_SRC_MKLCPU_HELPERS_HPP_
-#define _ONEMKL_DFT_SRC_MKLCPU_HELPERS_HPP_
+#ifndef _ONEMATH_DFT_SRC_MKLCPU_HELPERS_HPP_
+#define _ONEMATH_DFT_SRC_MKLCPU_HELPERS_HPP_
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
-// MKLCPU header
-#include "mkl_dfti.h"
+// Intel(R) oneMKL header
+#include <mkl_dfti.h>
 
-namespace oneapi::mkl::dft::mklcpu::detail {
+namespace oneapi::math::dft::mklcpu::detail {
 
 template <typename K, typename H, typename F>
 static inline auto host_task_internal(H& cgh, F f, int) -> decltype(cgh.host_task(f)) {
@@ -82,8 +82,8 @@ inline constexpr DFTI_CONFIG_PARAM to_mklcpu(dft::detail::config_param param) {
         case iparam::PACKED_FORMAT: return DFTI_PACKED_FORMAT;
         case iparam::COMMIT_STATUS: return DFTI_COMMIT_STATUS;
         default:
-            throw mkl::invalid_argument("dft", "MKLCPU descriptor set_value()",
-                                        "Invalid config param.");
+            throw math::invalid_argument("dft", "MKLCPU descriptor set_value()",
+                                         "Invalid config param.");
             return static_cast<DFTI_CONFIG_PARAM>(0);
     }
 }
@@ -105,8 +105,8 @@ inline constexpr int to_mklcpu<dft::detail::config_param::COMPLEX_STORAGE>(
         return DFTI_REAL_REAL;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLCPU descriptor set_value()",
-                                    "Invalid config value for complex storage.");
+        throw math::invalid_argument("dft", "MKLCPU descriptor set_value()",
+                                     "Invalid config value for complex storage.");
         return 0;
     }
 }
@@ -118,8 +118,8 @@ inline constexpr int to_mklcpu<dft::detail::config_param::REAL_STORAGE>(
         return DFTI_REAL_REAL;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLCPU descriptor set_value()",
-                                    "Invalid config value for real storage.");
+        throw math::invalid_argument("dft", "MKLCPU descriptor set_value()",
+                                     "Invalid config value for real storage.");
         return 0;
     }
 }
@@ -130,8 +130,8 @@ inline constexpr int to_mklcpu<dft::detail::config_param::CONJUGATE_EVEN_STORAGE
         return DFTI_COMPLEX_COMPLEX;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLCPU descriptor set_value()",
-                                    "Invalid config value for conjugate even storage.");
+        throw math::invalid_argument("dft", "MKLCPU descriptor set_value()",
+                                     "Invalid config value for conjugate even storage.");
         return 0;
     }
 }
@@ -146,8 +146,8 @@ inline constexpr int to_mklcpu<dft::detail::config_param::PLACEMENT>(
         return DFTI_NOT_INPLACE;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLCPU descriptor set_value()",
-                                    "Invalid config value for inplace.");
+        throw math::invalid_argument("dft", "MKLCPU descriptor set_value()",
+                                     "Invalid config value for inplace.");
         return 0;
     }
 }
@@ -159,8 +159,8 @@ inline constexpr int to_mklcpu<dft::detail::config_param::PACKED_FORMAT>(
         return DFTI_CCE_FORMAT;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLCPU descriptor set_value()",
-                                    "Invalid config value for packed format.");
+        throw math::invalid_argument("dft", "MKLCPU descriptor set_value()",
+                                     "Invalid config value for packed format.");
         return 0;
     }
 }
@@ -173,6 +173,6 @@ typename AccType::value_type* acc_to_ptr(AccType acc) {
     return acc.template get_multi_ptr<sycl::access::decorated::no>().get();
 }
 
-} // namespace oneapi::mkl::dft::mklcpu::detail
+} // namespace oneapi::math::dft::mklcpu::detail
 
-#endif // _ONEMKL_DFT_SRC_MKLCPU_HELPERS_HPP_
+#endif // _ONEMATH_DFT_SRC_MKLCPU_HELPERS_HPP_
diff --git a/src/dft/backends/mklgpu/CMakeLists.txt b/src/dft/backends/mklgpu/CMakeLists.txt
index 8ec322de8..dfa9d5097 100644
--- a/src/dft/backends/mklgpu/CMakeLists.txt
+++ b/src/dft/backends/mklgpu/CMakeLists.txt
@@ -17,12 +17,13 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_dft_mklgpu)
+set(LIB_NAME onemath_dft_mklgpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 include(WarningsUtils)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   descriptor.cpp
   commit.cpp
@@ -30,51 +31,46 @@ add_library(${LIB_OBJ} OBJECT
   backward.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_dft_gpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_dft ${LIB_NAME})
+add_dependencies(onemath_backend_libs_dft ${LIB_NAME})
 
 target_include_directories(${LIB_NAME}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
-)
-
-# Due to using the same file name for different headers in this library and in
-# the Intel(R) oneAPI Math Kernel Library, we force the compiler to follow C++
-# Core Guideline SF.12 using the flag "-iquote" to avoid conflicts and find the
-# correct header.
-target_compile_options(${LIB_OBJ}
-  BEFORE PRIVATE -iquote $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 
 target_include_directories(${LIB_OBJ}
-  PRIVATE ${PROJECT_SOURCE_DIR}/src
+  PRIVATE ${PROJECT_SOURCE_DIR}/include
+          ${PROJECT_SOURCE_DIR}/src
+          ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::DFT)
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_SYCL::DFT
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 else()
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_DPCPP
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -87,8 +83,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/dft/backends/mklgpu/backward.cpp b/src/dft/backends/mklgpu/backward.cpp
index c0648fef3..4af5c7005 100644
--- a/src/dft/backends/mklgpu/backward.cpp
+++ b/src/dft/backends/mklgpu/backward.cpp
@@ -23,58 +23,60 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
 
+#include "common_onemkl_conversion.hpp"
 #include "mklgpu_helpers.hpp"
 
-#include "mkl_version.h"
-// MKLGPU header
+// Intel(R) oneMKL headers
+#include <mkl_version.h>
 #if INTEL_MKL_VERSION < 20250000
-#include <oneapi/mkl/dfti.hpp>
+#include <mkl/dfti.hpp>
 #else
-#include <oneapi/mkl/dft.hpp>
+#include <mkl/dft.hpp>
 #endif
 
-namespace oneapi::mkl::dft::mklgpu {
+namespace oneapi::math::dft::mklgpu {
 namespace detail {
 
 /// Forward a MKLGPU DFT call to the backend, checking that the commit impl is valid.
 /// Assumes backend descriptor values match those of the frontend.
 template <dft::detail::precision prec, dft::detail::domain dom, typename... ArgTs>
 inline auto compute_backward(dft::detail::descriptor<prec, dom>& desc, ArgTs&&... args) {
-    using mklgpu_desc_t = dft::descriptor<to_mklgpu(prec), to_mklgpu(dom)>;
+    using mklgpu_desc_t = oneapi::mkl::dft::descriptor<to_mklgpu(prec), to_mklgpu(dom)>;
     using desc_shptr_t = std::shared_ptr<mklgpu_desc_t>;
     using handle_t = std::pair<desc_shptr_t, desc_shptr_t>;
     auto commit_handle = dft::detail::get_commit(desc);
     if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklgpu) {
-        throw mkl::invalid_argument("DFT", "compute_backward",
-                                    "DFT descriptor has not been commited for MKLGPU");
+        throw math::invalid_argument("DFT", "compute_backward",
+                                     "DFT descriptor has not been commited for MKLGPU");
     }
     auto handle = reinterpret_cast<handle_t*>(commit_handle->get_handle());
     auto mklgpu_desc = handle->second; // Second because backward DFT.
     int commit_status{ DFTI_UNCOMMITTED };
-    mklgpu_desc->get_value(dft::config_param::COMMIT_STATUS, &commit_status);
+    mklgpu_desc->get_value(oneapi::mkl::dft::config_param::COMMIT_STATUS, &commit_status);
     if (commit_status != DFTI_COMMITTED) {
-        throw mkl::invalid_argument("DFT", "compute_backward",
-                                    "MKLGPU DFT descriptor was not successfully committed.");
+        throw math::invalid_argument("DFT", "compute_backward",
+                                     "MKLGPU DFT descriptor was not successfully committed.");
     }
-    // The MKLGPU backend's iterface contains fewer function signatures than in this
+    // The MKLGPU backend's interface contains fewer function signatures than in this
     // open-source library. Consequently, it is not required to forward template arguments
     // to resolve to the correct function.
-    return dft::compute_backward(*mklgpu_desc, std::forward<ArgTs>(args)...);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        oneapi::mkl::dft::compute_backward(*mklgpu_desc, std::forward<ArgTs>(args)...));
 }
 
-/// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match
+/// Throw an math::invalid_argument if the runtime param in the descriptor does not match
 /// the expected value.
 template <dft::detail::config_param Param, dft::detail::config_value Expected, typename DescT>
 inline auto expect_config(DescT& desc, const char* message) {
     dft::detail::config_value actual{ 0 };
     desc.get_value(Param, &actual);
     if (actual != Expected) {
-        throw mkl::invalid_argument("DFT", "compute_backward", message);
+        throw math::invalid_argument("DFT", "compute_backward", message);
     }
 }
 } // namespace detail
@@ -83,8 +85,8 @@ inline auto expect_config(DescT& desc, const char* message) {
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
     return detail::compute_backward(desc, inout);
@@ -92,18 +94,19 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& /*desc*/,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& /*inout_re*/,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& /*inout_im*/) {
-    throw mkl::unimplemented("DFT", "compute_backward",
-                             "MKLGPU does not support compute_backward(desc, inout_re, inout_im).");
+ONEMATH_EXPORT void compute_backward(descriptor_type& /*desc*/,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& /*inout_re*/,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& /*inout_im*/) {
+    throw math::unimplemented(
+        "DFT", "compute_backward",
+        "MKLGPU does not support compute_backward(desc, inout_re, inout_im).");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<bwd<descriptor_type>, 1>& in,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<bwd<descriptor_type>, 1>& in,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& out) {
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
                                                                   "Unexpected value for placement");
@@ -112,15 +115,15 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& /*in_re*/,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& /*in_im*/,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& /*out_re*/,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& /*out_im*/) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& /*in_re*/,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& /*in_im*/,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& /*out_re*/,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& /*out_im*/) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
-    throw oneapi::mkl::unimplemented(
+    throw oneapi::math::unimplemented(
         "DFT", "compute_backward(desc, in_re, in_im, out_re, out_im)",
         "MKLGPU does not support out-of-place FFT with real-real complex storage.");
 }
@@ -129,8 +132,8 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                            const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
     return detail::compute_backward(desc, inout, dependencies);
@@ -138,20 +141,20 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& /*desc*/,
-                                           scalar<descriptor_type>* /*inout_re*/,
-                                           scalar<descriptor_type>* /*inout_im*/,
-                                           const std::vector<sycl::event>& /*dependencies*/) {
-    throw mkl::unimplemented(
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& /*desc*/,
+                                            scalar<descriptor_type>* /*inout_re*/,
+                                            scalar<descriptor_type>* /*inout_im*/,
+                                            const std::vector<sycl::event>& /*dependencies*/) {
+    throw math::unimplemented(
         "DFT", "compute_backward",
         "MKLGPU does not support compute_backward(desc, inout_re, inout_im, dependencies).");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
-                                           fwd<descriptor_type>* out,
-                                           const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
+                                            fwd<descriptor_type>* out,
+                                            const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
                                                                   "Unexpected value for placement");
@@ -160,16 +163,16 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc,
-                                           scalar<descriptor_type>* /*in_re*/,
-                                           scalar<descriptor_type>* /*in_im*/,
-                                           scalar<descriptor_type>* /*out_re*/,
-                                           scalar<descriptor_type>* /*out_im*/,
-                                           const std::vector<sycl::event>& /*dependencies*/) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc,
+                                            scalar<descriptor_type>* /*in_re*/,
+                                            scalar<descriptor_type>* /*in_im*/,
+                                            scalar<descriptor_type>* /*out_re*/,
+                                            scalar<descriptor_type>* /*out_im*/,
+                                            const std::vector<sycl::event>& /*dependencies*/) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
-    throw oneapi::mkl::unimplemented(
+    throw oneapi::math::unimplemented(
         "DFT", "compute_backward(desc, in_re, in_im, out_re, out_im, deps)",
         "MKLGPU does not support out-of-place FFT with real-real complex storage.");
 }
@@ -177,4 +180,4 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc,
 // Template function instantiations
 #include "dft/backends/backend_backward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::mklgpu
+} // namespace oneapi::math::dft::mklgpu
diff --git a/src/dft/backends/mklgpu/commit.cpp b/src/dft/backends/mklgpu/commit.cpp
index 8405c3891..c92f9667c 100644
--- a/src/dft/backends/mklgpu/commit.cpp
+++ b/src/dft/backends/mklgpu/commit.cpp
@@ -23,52 +23,54 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/detail/backends.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/detail/backends.hpp"
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
-#include "oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp"
 
+#include "common_onemkl_conversion.hpp"
 #include "dft/backends/mklgpu/mklgpu_helpers.hpp"
 #include "../stride_helper.hpp"
 
-#include "mkl_version.h"
-// MKLGPU header
+// Intel(R) oneMKL headers
+#include <mkl_version.h>
 #if INTEL_MKL_VERSION < 20250000
-#include <oneapi/mkl/dfti.hpp>
+#include <mkl/dfti.hpp>
 #else
-#include <oneapi/mkl/dft.hpp>
+#include <mkl/dft.hpp>
 #endif
 
 // Intel oneMKL 2024.1 deprecates input/output strides.
+#include <mkl_version.h>
 #if INTEL_MKL_VERSION < 20240001
-#error MKLGPU requires oneMKL 2024.1 or later
+#error MKLGPU requires oneMath 2024.1 or later
 #endif
 
 /**
-Note that in this file, the Intel oneMKL closed-source library's interface mirrors the interface
-of this OneMKL open-source library. Consequently, the types under dft::TYPE are closed-source oneMKL types,
-and types under dft::detail::TYPE are from this library.
+Note that in this file, the Intel oneMKL-GPU library's interface mirrors the
+interface of this oneMath library. Consequently, the types under dft::TYPE are
+Intel oneMKL types, and types under dft::detail::TYPE are from this library.
 **/
 
-namespace oneapi::mkl::dft::mklgpu {
+namespace oneapi::math::dft::mklgpu {
 namespace detail {
 
 /// Commit impl class specialization for MKLGPU.
 template <dft::detail::precision prec, dft::detail::domain dom>
 class mklgpu_commit final : public dft::detail::commit_impl<prec, dom> {
 private:
-    // Equivalent MKLGPU precision and domain from OneMKL's precision / domain.
-    static constexpr dft::precision mklgpu_prec = to_mklgpu(prec);
-    static constexpr dft::domain mklgpu_dom = to_mklgpu(dom);
+    // Equivalent MKLGPU precision and domain from oneMath's precision / domain.
+    static constexpr oneapi::mkl::dft::precision mklgpu_prec = to_mklgpu(prec);
+    static constexpr oneapi::mkl::dft::domain mklgpu_dom = to_mklgpu(dom);
 
     // A pair of descriptors are needed because of the [[deprecated]]IN/OUTPUT_STRIDES vs F/BWD_STRIDES API.
-    // Of the pair [0] is fwd DFT, [1] is backward DFT. If possible, the pointers refer to the same desciptor.
+    // Of the pair [0] is fwd DFT, [1] is backward DFT. If possible, the pointers refer to the same descriptor.
     // Both pointers must be valid.
-    using mklgpu_descriptor_t = dft::descriptor<mklgpu_prec, mklgpu_dom>;
+    using mklgpu_descriptor_t = oneapi::mkl::dft::descriptor<mklgpu_prec, mklgpu_dom>;
     using descriptor_shptr_t = std::shared_ptr<mklgpu_descriptor_t>;
     using handle_t = std::pair<descriptor_shptr_t, descriptor_shptr_t>;
 
@@ -76,65 +78,43 @@ class mklgpu_commit final : public dft::detail::commit_impl<prec, dom> {
 
 public:
     mklgpu_commit(sycl::queue queue, const dft::detail::dft_values<prec, dom>& config_values)
-            : oneapi::mkl::dft::detail::commit_impl<prec, dom>(queue, backend::mklgpu,
-                                                               config_values),
+            : oneapi::math::dft::detail::commit_impl<prec, dom>(queue, backend::mklgpu,
+                                                                config_values),
               handle(std::make_shared<mklgpu_descriptor_t>(config_values.dimensions), nullptr) {
         handle.second = handle.first; // Make sure the bwd pointer is valid.
         // MKLGPU does not throw an informative exception for the following:
         if constexpr (prec == dft::detail::precision::DOUBLE) {
             if (!queue.get_device().has(sycl::aspect::fp64)) {
-                throw mkl::exception("dft/backends/mklgpu", "commit",
-                                     "Device does not support double precision.");
+                throw math::exception("dft/backends/mklgpu", "commit",
+                                      "Device does not support double precision.");
             }
         }
     }
 
     virtual void commit(const dft::detail::dft_values<prec, dom>& config_values) override {
         this->external_workspace_helper_ =
-            oneapi::mkl::dft::detail::external_workspace_helper<prec, dom>(
+            oneapi::math::dft::detail::external_workspace_helper<prec, dom>(
                 config_values.workspace_placement ==
-                oneapi::mkl::dft::detail::config_value::WORKSPACE_EXTERNAL);
+                oneapi::math::dft::detail::config_value::WORKSPACE_EXTERNAL);
 
         auto stride_choice = dft::detail::get_stride_api(config_values);
         throw_on_invalid_stride_api("MKLGPU commit", stride_choice);
         // A separate descriptor for each direction may not be required.
         bool one_descriptor = (stride_choice == dft::detail::stride_api::FB_STRIDES) ||
                               (config_values.input_strides == config_values.output_strides);
-        bool forward_good = true;
         // Make sure that second is always pointing to something new if this is a recommit.
         handle.second = handle.first;
 
         // Generate forward DFT descriptor. If using FWD/BWD_STRIDES API, only
         // one descriptor is needed.
         set_value(*handle.first, config_values, true, stride_choice);
-        try {
-            handle.first->commit(this->get_queue());
-        }
-        catch (const std::exception& mkl_exception) {
-            // Catching the real Intel oneMKL exception causes headaches with naming
-            forward_good = false;
-            if (one_descriptor) {
-                throw mkl::exception("dft/backends/mklgpu"
-                                     "commit",
-                                     mkl_exception.what());
-            }
-        }
+        RETHROW_ONEMKL_EXCEPTIONS(handle.first->commit(this->get_queue()));
 
         // Generate backward DFT descriptor only if required.
         if (!one_descriptor) {
             handle.second = std::make_shared<mklgpu_descriptor_t>(config_values.dimensions);
             set_value(*handle.second, config_values, false, stride_choice);
-            try {
-                handle.second->commit(this->get_queue());
-            }
-            catch (const std::exception& mkl_exception) {
-                // Catching the real Intel oneMKL exception causes headaches with naming.
-                if (!forward_good) {
-                    throw mkl::exception("dft/backends/mklgpu"
-                                         "commit",
-                                         mkl_exception.what());
-                }
-            }
+            RETHROW_ONEMKL_EXCEPTIONS(handle.second->commit(this->get_queue()));
         }
     }
 
@@ -146,17 +126,17 @@ class mklgpu_commit final : public dft::detail::commit_impl<prec, dom> {
 
     virtual void set_workspace(scalar_type* usm_workspace) override {
         this->external_workspace_helper_.set_workspace_throw(*this, usm_workspace);
-        handle.first->set_workspace(usm_workspace);
+        RETHROW_ONEMKL_EXCEPTIONS(handle.first->set_workspace(usm_workspace));
         if (handle.first != handle.second) {
-            handle.second->set_workspace(usm_workspace);
+            RETHROW_ONEMKL_EXCEPTIONS(handle.second->set_workspace(usm_workspace));
         }
     }
 
     virtual void set_workspace(sycl::buffer<scalar_type>& buffer_workspace) override {
         this->external_workspace_helper_.set_workspace_throw(*this, buffer_workspace);
-        handle.first->set_workspace(buffer_workspace);
+        RETHROW_ONEMKL_EXCEPTIONS(handle.first->set_workspace(buffer_workspace));
         if (handle.first != handle.second) {
-            handle.second->set_workspace(buffer_workspace);
+            RETHROW_ONEMKL_EXCEPTIONS(handle.second->set_workspace(buffer_workspace));
         }
     }
 
@@ -170,8 +150,8 @@ class mklgpu_commit final : public dft::detail::commit_impl<prec, dom> {
 
     void set_value(mklgpu_descriptor_t& desc, const dft::detail::dft_values<prec, dom>& config,
                    bool assume_fwd_dft, dft::detail::stride_api stride_choice) {
-        using onemkl_param = dft::detail::config_param;
-        using backend_param = dft::config_param;
+        using onemath_param = dft::detail::config_param;
+        using backend_param = oneapi::mkl::dft::config_param;
 
         // The following are read-only:
         // Dimension, forward domain, precision, commit status.
@@ -180,28 +160,28 @@ class mklgpu_commit final : public dft::detail::commit_impl<prec, dom> {
         desc.set_value(backend_param::BACKWARD_SCALE, config.bwd_scale);
         desc.set_value(backend_param::NUMBER_OF_TRANSFORMS, config.number_of_transforms);
         desc.set_value(backend_param::COMPLEX_STORAGE,
-                       to_mklgpu<onemkl_param::COMPLEX_STORAGE>(config.complex_storage));
+                       to_mklgpu<onemath_param::COMPLEX_STORAGE>(config.complex_storage));
         if (config.real_storage != dft::detail::config_value::REAL_REAL) {
-            throw mkl::invalid_argument("dft/backends/mklgpu", "commit",
-                                        "MKLGPU only supports real-real real storage.");
+            throw math::invalid_argument("dft/backends/mklgpu", "commit",
+                                         "MKLGPU only supports real-real real storage.");
         }
         desc.set_value(backend_param::CONJUGATE_EVEN_STORAGE,
-                       to_mklgpu<onemkl_param::CONJUGATE_EVEN_STORAGE>(config.conj_even_storage));
+                       to_mklgpu<onemath_param::CONJUGATE_EVEN_STORAGE>(config.conj_even_storage));
         desc.set_value(backend_param::PLACEMENT,
-                       to_mklgpu<onemkl_param::PLACEMENT>(config.placement));
+                       to_mklgpu<onemath_param::PLACEMENT>(config.placement));
 
         if (stride_choice == dft::detail::stride_api::FB_STRIDES) {
             if (config.fwd_strides[0] != 0 || config.fwd_strides[0] != 0) {
-                throw mkl::unimplemented("dft/backends/mklgpu", "commit",
-                                         "MKLGPU does not support nonzero offsets.");
+                throw math::unimplemented("dft/backends/mklgpu", "commit",
+                                          "MKLGPU does not support nonzero offsets.");
             }
             desc.set_value(backend_param::FWD_STRIDES, config.fwd_strides.data());
             desc.set_value(backend_param::BWD_STRIDES, config.bwd_strides.data());
         }
         else {
             if (config.input_strides[0] != 0 || config.output_strides[0] != 0) {
-                throw mkl::unimplemented("dft/backends/mklgpu", "commit",
-                                         "MKLGPU does not support nonzero offsets.");
+                throw math::unimplemented("dft/backends/mklgpu", "commit",
+                                          "MKLGPU does not support nonzero offsets.");
             }
             if (assume_fwd_dft) {
                 desc.set_value(backend_param::FWD_STRIDES, config.input_strides.data());
@@ -217,26 +197,27 @@ class mklgpu_commit final : public dft::detail::commit_impl<prec, dom> {
         if (config.workspace_placement == dft::detail::config_value::WORKSPACE_EXTERNAL) {
             // Setting WORKSPACE_INTERNAL (default) causes FFT_INVALID_DESCRIPTOR.
             desc.set_value(backend_param::WORKSPACE,
-                           to_mklgpu_config_value<onemkl_param::WORKSPACE_PLACEMENT>(
+                           to_mklgpu_config_value<onemath_param::WORKSPACE_PLACEMENT>(
                                config.workspace_placement));
         }
         // Setting the ordering causes an FFT_INVALID_DESCRIPTOR. Check that default is used:
         if (config.ordering != dft::detail::config_value::ORDERED) {
-            throw mkl::invalid_argument("dft/backends/mklgpu", "commit",
-                                        "MKLGPU only supports ordered ordering.");
+            throw math::invalid_argument("dft/backends/mklgpu", "commit",
+                                         "MKLGPU only supports ordered ordering.");
         }
         // Setting the transpose causes an FFT_INVALID_DESCRIPTOR. Check that default is used:
         if (config.transpose != false) {
-            throw mkl::invalid_argument("dft/backends/mklgpu", "commit",
-                                        "MKLGPU only supports non-transposed.");
+            throw math::invalid_argument("dft/backends/mklgpu", "commit",
+                                         "MKLGPU only supports non-transposed.");
         }
     }
 
     // This is called by the workspace_helper, and is not part of the user API.
     virtual std::int64_t get_workspace_external_bytes_impl() override {
         std::size_t workspaceSizeFwd = 0, workspaceSizeBwd = 0;
-        handle.first->get_value(dft::config_param::WORKSPACE_BYTES, &workspaceSizeFwd);
-        handle.second->get_value(dft::config_param::WORKSPACE_BYTES, &workspaceSizeBwd);
+        using backend_param = oneapi::mkl::dft::config_param;
+        handle.first->get_value(backend_param::WORKSPACE_BYTES, &workspaceSizeFwd);
+        handle.second->get_value(backend_param::WORKSPACE_BYTES, &workspaceSizeBwd);
         return static_cast<std::int64_t>(std::max(workspaceSizeFwd, workspaceSizeFwd));
     }
 };
@@ -265,4 +246,4 @@ create_commit(
     const dft::detail::descriptor<dft::detail::precision::DOUBLE, dft::detail::domain::COMPLEX>&,
     sycl::queue&);
 
-} // namespace oneapi::mkl::dft::mklgpu
+} // namespace oneapi::math::dft::mklgpu
diff --git a/src/dft/backends/mklgpu/descriptor.cpp b/src/dft/backends/mklgpu/descriptor.cpp
index 7f7f0bf70..8a0fee21b 100644
--- a/src/dft/backends/mklgpu/descriptor.cpp
+++ b/src/dft/backends/mklgpu/descriptor.cpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 #include "../../descriptor.cxx"
 
-#include "oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp"
+#include "oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp"
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 template <precision prec, domain dom>
 void descriptor<prec, dom>::commit(backend_selector<backend::mklgpu> selector) {
@@ -44,4 +44,4 @@ template void descriptor<precision::DOUBLE, domain::COMPLEX>::commit(
 template void descriptor<precision::DOUBLE, domain::REAL>::commit(
     backend_selector<backend::mklgpu>);
 
-} //namespace oneapi::mkl::dft::detail
+} //namespace oneapi::math::dft::detail
diff --git a/src/dft/backends/mklgpu/forward.cpp b/src/dft/backends/mklgpu/forward.cpp
index fb526eee9..f4ce97b7b 100644
--- a/src/dft/backends/mklgpu/forward.cpp
+++ b/src/dft/backends/mklgpu/forward.cpp
@@ -24,64 +24,65 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
 
+#include "common_onemkl_conversion.hpp"
 #include "mklgpu_helpers.hpp"
 
-#include "mkl_version.h"
-// MKLGPU header
+// Intel(R) oneMKL headers
+#include <mkl_version.h>
 #if INTEL_MKL_VERSION < 20250000
-#include <oneapi/mkl/dfti.hpp>
+#include <mkl/dfti.hpp>
 #else
-#include <oneapi/mkl/dft.hpp>
+#include <mkl/dft.hpp>
 #endif
 
 /**
 Note that in this file, the Intel oneMKL-GPU library's interface mirrors the
-interface of this OneMKL library. Consequently, the types under dft::TYPE are
-closed-source Intel oneMKL types, and types under dft::detail::TYPE are from
-this library.
+interface of this oneMath library. Consequently, the types under dft::TYPE are
+Intel oneMKL types, and types under dft::detail::TYPE are from this library.
 **/
 
-namespace oneapi::mkl::dft::mklgpu {
+namespace oneapi::math::dft::mklgpu {
 namespace detail {
 /// Forward a MKLGPU DFT call to the backend, checking that the commit impl is valid.
 /// Assumes backend descriptor values match those of the frontend.
 template <dft::detail::precision prec, dft::detail::domain dom, typename... ArgTs>
 inline auto compute_forward(dft::detail::descriptor<prec, dom>& desc, ArgTs&&... args) {
-    using mklgpu_desc_t = dft::descriptor<to_mklgpu(prec), to_mklgpu(dom)>;
+    using mklgpu_desc_t = oneapi::mkl::dft::descriptor<to_mklgpu(prec), to_mklgpu(dom)>;
     using desc_shptr_t = std::shared_ptr<mklgpu_desc_t>;
     using handle_t = std::pair<desc_shptr_t, desc_shptr_t>;
     auto commit_handle = dft::detail::get_commit(desc);
     if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklgpu) {
-        throw mkl::invalid_argument("DFT", "compute_forward",
-                                    "DFT descriptor has not been commited for MKLGPU");
+        throw math::invalid_argument("DFT", "compute_forward",
+                                     "DFT descriptor has not been commited for MKLGPU");
     }
     auto handle = reinterpret_cast<handle_t*>(commit_handle->get_handle());
     auto mklgpu_desc = handle->first; // First because forward DFT.
     int commit_status{ DFTI_UNCOMMITTED };
-    mklgpu_desc->get_value(dft::config_param::COMMIT_STATUS, &commit_status);
+    mklgpu_desc->get_value(oneapi::mkl::dft::config_param::COMMIT_STATUS, &commit_status);
     if (commit_status != DFTI_COMMITTED) {
-        throw mkl::invalid_argument("DFT", "compute_forward",
-                                    "MKLGPU DFT descriptor was not successfully committed.");
+        throw math::invalid_argument("DFT", "compute_forward",
+                                     "MKLGPU DFT descriptor was not successfully committed.");
     }
-    // The MKLGPU backend's iterface contains fewer function signatures than in this
+    // The MKLGPU backend's interface contains fewer function signatures than in this
     // open-source library. Consequently, it is not required to forward template arguments
     // to resolve to the correct function.
-    return dft::compute_forward(*mklgpu_desc, std::forward<ArgTs>(args)...);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        oneapi::mkl::dft::compute_forward(*mklgpu_desc, std::forward<ArgTs>(args)...));
 }
 
-/// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match
+/// Throw an math::invalid_argument if the runtime param in the descriptor does not match
 /// the expected value.
 template <dft::detail::config_param Param, dft::detail::config_value Expected, typename DescT>
 inline auto expect_config(DescT& desc, const char* message) {
     dft::detail::config_value actual{ 0 };
     desc.get_value(Param, &actual);
     if (actual != Expected) {
-        throw mkl::invalid_argument("DFT", "compute_forward", message);
+        throw math::invalid_argument("DFT", "compute_forward", message);
     }
 }
 } // namespace detail
@@ -90,8 +91,8 @@ inline auto expect_config(DescT& desc, const char* message) {
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
     return detail::compute_forward(desc, inout);
@@ -99,17 +100,18 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& /*desc*/,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& /*inout_re*/,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& /*inout_im*/) {
-    throw mkl::unimplemented("DFT", "compute_forward",
-                             "MKLGPU does not support compute_forward(desc, inout_re, inout_im).");
+ONEMATH_EXPORT void compute_forward(descriptor_type& /*desc*/,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& /*inout_re*/,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& /*inout_im*/) {
+    throw math::unimplemented("DFT", "compute_forward",
+                              "MKLGPU does not support compute_forward(desc, inout_re, inout_im).");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descriptor_type>, 1>& in,
-                                   sycl::buffer<bwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& in,
+                                    sycl::buffer<bwd<descriptor_type>, 1>& out) {
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
                                                                   "Unexpected value for placement");
@@ -118,15 +120,15 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descr
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& /*in_re*/,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& /*in_im*/,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& /*out_re*/,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& /*out_im*/) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& /*in_re*/,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& /*in_im*/,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& /*out_re*/,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& /*out_im*/) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
-    throw oneapi::mkl::unimplemented(
+    throw oneapi::math::unimplemented(
         "DFT", "compute_forward(desc, in_re, in_im, out_re, out_im)",
         "MKLGPU does not support out-of-place FFT with real-real complex storage.");
 }
@@ -135,8 +137,8 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                           const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::PLACEMENT, dft::detail::config_value::INPLACE>(
         desc, "Unexpected value for placement");
     return detail::compute_forward(desc, inout, dependencies);
@@ -144,20 +146,20 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& /*desc*/,
-                                          scalar<descriptor_type>* /*inout_re*/,
-                                          scalar<descriptor_type>* /*inout_im*/,
-                                          const std::vector<sycl::event>& /*dependencies*/) {
-    throw mkl::unimplemented(
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& /*desc*/,
+                                           scalar<descriptor_type>* /*inout_re*/,
+                                           scalar<descriptor_type>* /*inout_im*/,
+                                           const std::vector<sycl::event>& /*dependencies*/) {
+    throw math::unimplemented(
         "DFT", "compute_forward",
         "MKLGPU does not support compute_forward(desc, inout_re, inout_im, dependencies).");
 }
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
-                                          bwd<descriptor_type>* out,
-                                          const std::vector<sycl::event>& dependencies) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
+                                           bwd<descriptor_type>* out,
+                                           const std::vector<sycl::event>& dependencies) {
     detail::expect_config<dft::detail::config_param::PLACEMENT,
                           dft::detail::config_value::NOT_INPLACE>(desc,
                                                                   "Unexpected value for placement");
@@ -166,15 +168,16 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* /*in_re*/,
-                                          scalar<descriptor_type>* /*in_im*/,
-                                          scalar<descriptor_type>* /*out_re*/,
-                                          scalar<descriptor_type>* /*out_im*/,
-                                          const std::vector<sycl::event>& /*dependencies*/) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc,
+                                           scalar<descriptor_type>* /*in_re*/,
+                                           scalar<descriptor_type>* /*in_im*/,
+                                           scalar<descriptor_type>* /*out_re*/,
+                                           scalar<descriptor_type>* /*out_im*/,
+                                           const std::vector<sycl::event>& /*dependencies*/) {
     detail::expect_config<dft::detail::config_param::COMPLEX_STORAGE,
                           dft::detail::config_value::REAL_REAL>(
         desc, "Unexpected value for complex storage");
-    throw oneapi::mkl::unimplemented(
+    throw oneapi::math::unimplemented(
         "DFT", "compute_forward(desc, in_re, in_im, out_re, out_im, dependencies)",
         "MKLGPU does not support out-of-place FFT with real-real complex storage.");
 }
@@ -182,4 +185,4 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
 // Template function instantiations
 #include "dft/backends/backend_forward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::mklgpu
+} // namespace oneapi::math::dft::mklgpu
diff --git a/src/dft/backends/mklgpu/mkl_dft_gpu_wrappers.cpp b/src/dft/backends/mklgpu/mkl_dft_gpu_wrappers.cpp
index 8d2fa111d..a6b50fe39 100644
--- a/src/dft/backends/mklgpu/mkl_dft_gpu_wrappers.cpp
+++ b/src/dft/backends/mklgpu/mkl_dft_gpu_wrappers.cpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/detail/mklgpu/onemkl_dft_mklgpu.hpp"
+#include "oneapi/math/dft/detail/mklgpu/onemath_dft_mklgpu.hpp"
 #include "dft/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         mklgpu
 
-extern "C" dft_function_table_t mkl_dft_table = {
+extern "C" dft_function_table_t onemath_dft_table = {
     WRAPPER_VERSION,
 #include "dft/backends/backend_wrappers.cxx"
 };
diff --git a/src/dft/backends/mklgpu/mklgpu_helpers.hpp b/src/dft/backends/mklgpu/mklgpu_helpers.hpp
index 4b93d1423..3be413637 100644
--- a/src/dft/backends/mklgpu/mklgpu_helpers.hpp
+++ b/src/dft/backends/mklgpu/mklgpu_helpers.hpp
@@ -17,50 +17,50 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_SRC_MKLGPU_HELPERS_HPP_
-#define _ONEMKL_DFT_SRC_MKLGPU_HELPERS_HPP_
+#ifndef _ONEMATH_DFT_SRC_MKLGPU_HELPERS_HPP_
+#define _ONEMATH_DFT_SRC_MKLGPU_HELPERS_HPP_
 
-#include "oneapi/mkl/detail/exceptions.hpp"
-#include "oneapi/mkl/dft/detail/types_impl.hpp"
+#include "oneapi/math/detail/exceptions.hpp"
+#include "oneapi/math/dft/detail/types_impl.hpp"
 
-#include "mkl_version.h"
-// MKLGPU header
+// Intel(R) oneMKL headers
+#include <mkl_version.h>
 #if INTEL_MKL_VERSION < 20250000
-#include <oneapi/mkl/dfti.hpp>
+#include <mkl/dfti.hpp>
 #else
-#include <oneapi/mkl/dft.hpp>
+#include <mkl/dft.hpp>
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace mklgpu {
 namespace detail {
 
 /// Convert domain to equivalent backend native value.
-inline constexpr dft::domain to_mklgpu(dft::detail::domain dom) {
+inline constexpr oneapi::mkl::dft::domain to_mklgpu(dft::detail::domain dom) {
     if (dom == dft::detail::domain::REAL) {
-        return dft::domain::REAL;
+        return oneapi::mkl::dft::domain::REAL;
     }
     else {
-        return dft::domain::COMPLEX;
+        return oneapi::mkl::dft::domain::COMPLEX;
     }
 }
 
 /// Convert precision to equivalent backend native value.
-inline constexpr dft::precision to_mklgpu(dft::detail::precision dom) {
+inline constexpr oneapi::mkl::dft::precision to_mklgpu(dft::detail::precision dom) {
     if (dom == dft::detail::precision::SINGLE) {
-        return dft::precision::SINGLE;
+        return oneapi::mkl::dft::precision::SINGLE;
     }
     else {
-        return dft::precision::DOUBLE;
+        return oneapi::mkl::dft::precision::DOUBLE;
     }
 }
 
 /// Convert a config_param to equivalent backend native value.
-inline constexpr dft::config_param to_mklgpu(dft::detail::config_param param) {
+inline constexpr oneapi::mkl::dft::config_param to_mklgpu(dft::detail::config_param param) {
     using iparam = dft::detail::config_param;
-    using oparam = dft::config_param;
+    using oparam = oneapi::mkl::dft::config_param;
     switch (param) {
         case iparam::FORWARD_DOMAIN: return oparam::FORWARD_DOMAIN;
         case iparam::DIMENSION: return oparam::DIMENSION;
@@ -78,8 +78,8 @@ inline constexpr dft::config_param to_mklgpu(dft::detail::config_param param) {
         case iparam::WORKSPACE_EXTERNAL_BYTES: return oparam::WORKSPACE_BYTES;
         case iparam::COMMIT_STATUS: return oparam::COMMIT_STATUS;
         default:
-            throw mkl::invalid_argument("dft", "MKLGPU descriptor set_value()",
-                                        "Invalid config param.");
+            throw math::invalid_argument("dft", "MKLGPU descriptor set_value()",
+                                         "Invalid config param.");
             return static_cast<oparam>(0);
     }
 }
@@ -98,8 +98,8 @@ inline constexpr int to_mklgpu<dft::detail::config_param::COMPLEX_STORAGE>(
         return DFTI_COMPLEX_COMPLEX;
     }
     else {
-        throw mkl::unimplemented("dft", "MKLGPU descriptor set_value()",
-                                 "MKLGPU only supports complex-complex for complex storage.");
+        throw math::unimplemented("dft", "MKLGPU descriptor set_value()",
+                                  "MKLGPU only supports complex-complex for complex storage.");
         return 0;
     }
 }
@@ -111,8 +111,8 @@ inline constexpr int to_mklgpu<dft::detail::config_param::CONJUGATE_EVEN_STORAGE
         return DFTI_COMPLEX_COMPLEX;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLGPU descriptor set_value()",
-                                    "Invalid config value for conjugate even storage.");
+        throw math::invalid_argument("dft", "MKLGPU descriptor set_value()",
+                                     "Invalid config value for conjugate even storage.");
         return 0;
     }
 }
@@ -127,8 +127,8 @@ inline constexpr int to_mklgpu<dft::detail::config_param::PLACEMENT>(
         return DFTI_NOT_INPLACE;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLGPU descriptor set_value()",
-                                    "Invalid config value for inplace.");
+        throw math::invalid_argument("dft", "MKLGPU descriptor set_value()",
+                                     "Invalid config value for inplace.");
         return 0;
     }
 }
@@ -140,8 +140,8 @@ inline constexpr int to_mklgpu<dft::detail::config_param::PACKED_FORMAT>(
         return DFTI_CCE_FORMAT;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLGPU descriptor set_value()",
-                                    "Invalid config value for packed format.");
+        throw math::invalid_argument("dft", "MKLGPU descriptor set_value()",
+                                     "Invalid config value for packed format.");
         return 0;
     }
 }
@@ -151,29 +151,30 @@ inline constexpr int to_mklgpu<dft::detail::config_param::PACKED_FORMAT>(
  * @param value The config value to convert.
 **/
 template <dft::detail::config_param Param>
-inline constexpr dft::config_value to_mklgpu_config_value(dft::detail::config_value value);
+inline constexpr oneapi::mkl::dft::config_value to_mklgpu_config_value(
+    dft::detail::config_value value);
 
 template <>
-inline constexpr dft::config_value
+inline constexpr oneapi::mkl::dft::config_value
 to_mklgpu_config_value<dft::detail::config_param::WORKSPACE_PLACEMENT>(
     dft::detail::config_value value) {
     if (value == dft::detail::config_value::WORKSPACE_AUTOMATIC) {
-        // NB: dft::config_value != dft::detail::config_value
-        return dft::config_value::WORKSPACE_INTERNAL;
+        // NB: oneapi::mkl::dft::config_value != dft::detail::config_value
+        return oneapi::mkl::dft::config_value::WORKSPACE_INTERNAL;
     }
     else if (value == dft::detail::config_value::WORKSPACE_EXTERNAL) {
-        return dft::config_value::WORKSPACE_EXTERNAL;
+        return oneapi::mkl::dft::config_value::WORKSPACE_EXTERNAL;
     }
     else {
-        throw mkl::invalid_argument("dft", "MKLGPU descriptor set_value()",
-                                    "Invalid config value for workspace placement.");
-        return dft::config_value::WORKSPACE_INTERNAL;
+        throw math::invalid_argument("dft", "MKLGPU descriptor set_value()",
+                                     "Invalid config value for workspace placement.");
+        return oneapi::mkl::dft::config_value::WORKSPACE_INTERNAL;
     }
 }
 } // namespace detail
 } // namespace mklgpu
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _ONEMKL_DFT_SRC_MKLGPU_HELPERS_HPP_
+#endif // _ONEMATH_DFT_SRC_MKLGPU_HELPERS_HPP_
diff --git a/src/dft/backends/portfft/CMakeLists.txt b/src/dft/backends/portfft/CMakeLists.txt
index 752fae8d5..72ec70e24 100644
--- a/src/dft/backends/portfft/CMakeLists.txt
+++ b/src/dft/backends/portfft/CMakeLists.txt
@@ -54,20 +54,21 @@ if (IS_DPCPP AND UNIX AND NOT FOUND_TARGETS)
   list(APPEND TARGETS_COMPILE_OPTIONS -fsycl-targets=${TARGETS_TRIPLES})
   list(APPEND TARGETS_LINK_OPTIONS -fsycl-targets=${TARGETS_TRIPLES})
 
-  target_compile_options(ONEMKL::SYCL::SYCL INTERFACE ${TARGETS_COMPILE_OPTIONS})
-  target_link_options(ONEMKL::SYCL::SYCL INTERFACE ${TARGETS_LINK_OPTIONS})
+  target_compile_options(ONEMATH::SYCL::SYCL INTERFACE ${TARGETS_COMPILE_OPTIONS})
+  target_link_options(ONEMATH::SYCL::SYCL INTERFACE ${TARGETS_LINK_OPTIONS})
 endif()
 
-set(LIB_NAME onemkl_dft_portfft)
+set(LIB_NAME onemath_dft_portfft)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   descriptor.cpp
   commit.cpp
-  $<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_dft_portfft_wrappers.cpp>
+  $<$<BOOL:${BUILD_SHARED_LIBS}>: portfft_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_dft ${LIB_NAME})
+add_dependencies(onemath_backend_libs_dft ${LIB_NAME})
 
 find_package(portfft QUIET)
 if (NOT portfft_FOUND)
@@ -88,32 +89,32 @@ else()
 	target_link_libraries(${LIB_OBJ} PRIVATE portfft::portfft)
 endif()
 
-target_link_libraries(${LIB_OBJ} PRIVATE onemkl_warnings)
+target_link_libraries(${LIB_OBJ} PRIVATE onemath_warnings)
 
 target_include_directories(${LIB_OBJ}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 target_include_directories(${LIB_NAME}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL)
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -126,8 +127,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/dft/backends/portfft/commit.cpp b/src/dft/backends/portfft/commit.cpp
index a2c80e91a..628f93d02 100644
--- a/src/dft/backends/portfft/commit.cpp
+++ b/src/dft/backends/portfft/commit.cpp
@@ -28,12 +28,12 @@
 
 #include <portfft/portfft.hpp>
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
-#include "oneapi/mkl/dft/detail/portfft/onemkl_dft_portfft.hpp"
-#include "oneapi/mkl/dft/types.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/detail/portfft/onemath_dft_portfft.hpp"
+#include "oneapi/math/dft/types.hpp"
 
 #include "../stride_helper.hpp"
 
@@ -42,7 +42,7 @@
 // alias to avoid ambiguity
 namespace pfft = portfft;
 
-namespace oneapi::mkl::dft::portfft {
+namespace oneapi::math::dft::portfft {
 namespace detail {
 
 template <dft::precision prec, dft::domain dom>
@@ -61,11 +61,11 @@ class portfft_commit final : public dft::detail::commit_impl<prec, dom> {
 
 public:
     portfft_commit(sycl::queue& queue, const dft::detail::dft_values<prec, dom>& config_values)
-            : oneapi::mkl::dft::detail::commit_impl<prec, dom>(queue, backend::portfft,
-                                                               config_values) {
+            : oneapi::math::dft::detail::commit_impl<prec, dom>(queue, backend::portfft,
+                                                                config_values) {
         if constexpr (prec == dft::detail::precision::DOUBLE) {
             if (!queue.get_device().has(sycl::aspect::fp64)) {
-                throw mkl::exception("DFT", "commit", "Device does not support double precision.");
+                throw math::exception("DFT", "commit", "Device does not support double precision.");
             }
         }
     }
@@ -73,20 +73,20 @@ class portfft_commit final : public dft::detail::commit_impl<prec, dom> {
     void commit(const dft::detail::dft_values<prec, dom>& config_values) override {
         // not available in portFFT:
         this->external_workspace_helper_ =
-            oneapi::mkl::dft::detail::external_workspace_helper<prec, dom>(
+            oneapi::math::dft::detail::external_workspace_helper<prec, dom>(
                 config_values.workspace_placement ==
-                oneapi::mkl::dft::detail::config_value::WORKSPACE_EXTERNAL);
+                oneapi::math::dft::detail::config_value::WORKSPACE_EXTERNAL);
         if (config_values.workspace != config_value::ALLOW) {
-            throw mkl::unimplemented("dft/backends/portfft", __FUNCTION__,
-                                     "portFFT only supports ALLOW for the WORKSPACE parameter");
+            throw math::unimplemented("dft/backends/portfft", __FUNCTION__,
+                                      "portFFT only supports ALLOW for the WORKSPACE parameter");
         }
         if (config_values.ordering != config_value::ORDERED) {
-            throw mkl::unimplemented("dft/backends/portfft", __FUNCTION__,
-                                     "portFFT only supports ORDERED for the ORDERING parameter");
+            throw math::unimplemented("dft/backends/portfft", __FUNCTION__,
+                                      "portFFT only supports ORDERED for the ORDERING parameter");
         }
         if (config_values.transpose) {
-            throw mkl::unimplemented("dft/backends/portfft", __FUNCTION__,
-                                     "portFFT does not supported transposed output");
+            throw math::unimplemented("dft/backends/portfft", __FUNCTION__,
+                                      "portFFT does not supported transposed output");
         }
 
         auto stride_api_choice = dft::detail::get_stride_api(config_values);
@@ -140,7 +140,7 @@ class portfft_commit final : public dft::detail::commit_impl<prec, dom> {
             committed_descriptors[1] = bwd_desc.commit(q);
         }
         catch (const pfft::unsupported_configuration& e) {
-            throw oneapi::mkl::unimplemented("dft/backends/portfft", __FUNCTION__, e.what());
+            throw oneapi::math::unimplemented("dft/backends/portfft", __FUNCTION__, e.what());
         }
     }
 
@@ -181,15 +181,15 @@ class portfft_commit final : public dft::detail::commit_impl<prec, dom> {
                        sycl::buffer<scalar_type, 1>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
             "compute_forward");
-        throw oneapi::mkl::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im)",
-                                         "portFFT does not support real-real complex storage.");
+        throw oneapi::math::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im)",
+                                          "portFFT does not support real-real complex storage.");
     }
     sycl::event forward_ip_rr(descriptor_type& desc, scalar_type*, scalar_type*,
                               const std::vector<sycl::event>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>("compute_forward");
-        throw oneapi::mkl::unimplemented("DFT",
-                                         "compute_forward(desc, inout_re, inout_im, dependencies)",
-                                         "portFFT does not support real-real complex storage.");
+        throw oneapi::math::unimplemented("DFT",
+                                          "compute_forward(desc, inout_re, inout_im, dependencies)",
+                                          "portFFT does not support real-real complex storage.");
     }
 
     // forward out-of-place COMPLEX_COMPLEX
@@ -222,14 +222,14 @@ class portfft_commit final : public dft::detail::commit_impl<prec, dom> {
                        sycl::buffer<scalar_type, 1>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
             "compute_forward");
-        throw oneapi::mkl::unimplemented("DFT",
-                                         "compute_forward(desc, in_re, in_im, out_re, out_im)",
-                                         "portFFT does not support real-real complex storage.");
+        throw oneapi::math::unimplemented("DFT",
+                                          "compute_forward(desc, in_re, in_im, out_re, out_im)",
+                                          "portFFT does not support real-real complex storage.");
     }
     sycl::event forward_op_rr(descriptor_type& desc, scalar_type*, scalar_type*, scalar_type*,
                               scalar_type*, const std::vector<sycl::event>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>("compute_forward");
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", "compute_forward(desc, in_re, in_im, out_re, out_im, dependencies)",
             "portFFT does not support real-real complex storage.");
     }
@@ -262,16 +262,16 @@ class portfft_commit final : public dft::detail::commit_impl<prec, dom> {
                         sycl::buffer<scalar_type, 1>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
             "compute_backward");
-        throw oneapi::mkl::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im)",
-                                         "portFFT does not support real-real complex storage.");
+        throw oneapi::math::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im)",
+                                          "portFFT does not support real-real complex storage.");
     }
     sycl::event backward_ip_rr(descriptor_type& desc, scalar_type*, scalar_type*,
                                const std::vector<sycl::event>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>(
             "compute_backward");
-        throw oneapi::mkl::unimplemented("DFT",
-                                         "compute_backward(desc, inout_re, inout_im, dependencies)",
-                                         "portFFT does not support real-real complex storage.");
+        throw oneapi::math::unimplemented(
+            "DFT", "compute_backward(desc, inout_re, inout_im, dependencies)",
+            "portFFT does not support real-real complex storage.");
     }
 
     // backward out-of-place COMPLEX_COMPLEX
@@ -304,15 +304,15 @@ class portfft_commit final : public dft::detail::commit_impl<prec, dom> {
                         sycl::buffer<scalar_type, 1>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<sycl::buffer<scalar_type, 1>>(
             "compute_backward");
-        throw oneapi::mkl::unimplemented("DFT",
-                                         "compute_backward(desc, in_re, in_im, out_re, out_im)",
-                                         "portFFT does not support real-real complex storage.");
+        throw oneapi::math::unimplemented("DFT",
+                                          "compute_backward(desc, in_re, in_im, out_re, out_im)",
+                                          "portFFT does not support real-real complex storage.");
     }
     sycl::event backward_op_rr(descriptor_type& desc, scalar_type*, scalar_type*, scalar_type*,
                                scalar_type*, const std::vector<sycl::event>&) override {
         dft::detail::get_commit(desc)->template compute_call_throw<scalar_type*>(
             "compute_backward");
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", "compute_backward(desc, in_re, in_im, out_re, out_im, deps)",
             "portFFT does not support real-real complex storage.");
     }
@@ -342,4 +342,4 @@ create_commit(
     const dft::detail::descriptor<dft::detail::precision::DOUBLE, dft::detail::domain::COMPLEX>&,
     sycl::queue&);
 
-} // namespace oneapi::mkl::dft::portfft
+} // namespace oneapi::math::dft::portfft
diff --git a/src/dft/backends/portfft/descriptor.cpp b/src/dft/backends/portfft/descriptor.cpp
index c45b9f2c5..9308bca93 100644
--- a/src/dft/backends/portfft/descriptor.cpp
+++ b/src/dft/backends/portfft/descriptor.cpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 #include "../../descriptor.cxx"
 
-#include "oneapi/mkl/dft/detail/portfft/onemkl_dft_portfft.hpp"
+#include "oneapi/math/dft/detail/portfft/onemath_dft_portfft.hpp"
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 template <precision prec, domain dom>
 void descriptor<prec, dom>::commit(backend_selector<backend::portfft> selector) {
@@ -44,4 +44,4 @@ template void descriptor<precision::DOUBLE, domain::COMPLEX>::commit(
 template void descriptor<precision::DOUBLE, domain::REAL>::commit(
     backend_selector<backend::portfft>);
 
-} // namespace oneapi::mkl::dft::detail
+} // namespace oneapi::math::dft::detail
diff --git a/src/dft/backends/portfft/portfft_helper.hpp b/src/dft/backends/portfft/portfft_helper.hpp
index 010f2a5e6..9900d6feb 100644
--- a/src/dft/backends/portfft/portfft_helper.hpp
+++ b/src/dft/backends/portfft/portfft_helper.hpp
@@ -17,26 +17,26 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_SRC_PORTFFT_HELPERS_HPP_
-#define _ONEMKL_DFT_SRC_PORTFFT_HELPERS_HPP_
+#ifndef _ONEMATH_DFT_SRC_PORTFFT_HELPERS_HPP_
+#define _ONEMATH_DFT_SRC_PORTFFT_HELPERS_HPP_
 
 #include <type_traits>
 
 #include <portfft/portfft.hpp>
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
 
 namespace pfft = portfft;
 
-namespace oneapi::mkl::dft::portfft::detail {
+namespace oneapi::math::dft::portfft::detail {
 template <dft::precision prec, dft::domain dom>
 inline dft::detail::commit_impl<prec, dom>* checked_get_commit(
     dft::detail::descriptor<prec, dom>& desc) {
     auto commit_handle = dft::detail::get_commit(desc);
     if (commit_handle == nullptr || commit_handle->get_backend() != backend::portfft) {
-        throw mkl::invalid_argument("dft/backends/portfft", "get_commit",
-                                    "DFT descriptor has not been commited for portFFT");
+        throw math::invalid_argument("dft/backends/portfft", "get_commit",
+                                     "DFT descriptor has not been commited for portFFT");
     }
     return commit_handle;
 }
@@ -57,6 +57,6 @@ auto get_descriptors(descriptor_type& desc) {
     auto commit = detail::checked_get_commit(desc);
     return reinterpret_cast<storage_type<descriptor_type>*>(commit->get_handle());
 }
-} // namespace oneapi::mkl::dft::portfft::detail
+} // namespace oneapi::math::dft::portfft::detail
 
 #endif
diff --git a/src/dft/backends/portfft/mkl_dft_portfft_wrappers.cpp b/src/dft/backends/portfft/portfft_wrappers.cpp
similarity index 89%
rename from src/dft/backends/portfft/mkl_dft_portfft_wrappers.cpp
rename to src/dft/backends/portfft/portfft_wrappers.cpp
index 28996b0a1..a628e3e25 100644
--- a/src/dft/backends/portfft/mkl_dft_portfft_wrappers.cpp
+++ b/src/dft/backends/portfft/portfft_wrappers.cpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/detail/portfft/onemkl_dft_portfft.hpp"
+#include "oneapi/math/dft/detail/portfft/onemath_dft_portfft.hpp"
 #include "dft/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         portfft
 
-extern "C" dft_function_table_t mkl_dft_table = {
+extern "C" dft_function_table_t onemath_dft_table = {
     WRAPPER_VERSION,
 #include "dft/backends/backend_wrappers.cxx"
 };
diff --git a/src/dft/backends/rocfft/CMakeLists.txt b/src/dft/backends/rocfft/CMakeLists.txt
index 1380c8f0a..b234f70e3 100644
--- a/src/dft/backends/rocfft/CMakeLists.txt
+++ b/src/dft/backends/rocfft/CMakeLists.txt
@@ -17,33 +17,34 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_dft_rocfft)
+set(LIB_NAME onemath_dft_rocfft)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   descriptor.cpp
   commit.cpp
   forward.cpp
   backward.cpp
-  $<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_dft_rocfft_wrappers.cpp>
+  $<$<BOOL:${BUILD_SHARED_LIBS}>: rocfft_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_dft ${LIB_NAME})
+add_dependencies(onemath_backend_libs_dft ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 target_include_directories(${LIB_NAME}
-  PUBLIC ${ONEMKL_INTERFACE_INCLUDE_DIRS}
+  PUBLIC ${ONEMATH_INCLUDE_DIRS}
 )
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 find_package(HIP REQUIRED)
 # Require the minimum rocFFT version matching with ROCm 5.4.3.
@@ -64,17 +65,17 @@ find_path(
 )
 target_include_directories(${LIB_OBJ} PRIVATE ${rocfft_EXTRA_INCLUDE_DIR})
 
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL)
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-# Set oneMKL libraries as not transitive for dynamic
+# Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -87,8 +88,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/dft/backends/rocfft/backward.cpp b/src/dft/backends/rocfft/backward.cpp
index d6973bfb1..745ff666d 100644
--- a/src/dft/backends/rocfft/backward.cpp
+++ b/src/dft/backends/rocfft/backward.cpp
@@ -23,10 +23,10 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp"
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 
 #include "execute_helper.hpp"
 #include "../../execute_helper_generic.hpp"
@@ -35,7 +35,7 @@
 #include <rocfft.h>
 #include <hip/hip_runtime_api.h>
 
-namespace oneapi::mkl::dft::rocfft {
+namespace oneapi::math::dft::rocfft {
 namespace detail {
 //forward declaration
 template <dft::precision prec, dft::domain dom>
@@ -55,8 +55,8 @@ rocfft_execution_info get_bwd_info(dft::detail::commit_impl<prec, dom>* commit)
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     const std::string func_name = "compute_backward(desc, inout)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -70,7 +70,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
         offsets[0] *= 2; // offset is supplied in complex but we offset scalar pointer
     }
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -92,9 +92,9 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
     const std::string func_name = "compute_backward(desc, inout_re, inout_im)";
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
@@ -103,7 +103,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
     auto offsets = detail::get_offsets_bwd(commit);
 
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -131,9 +131,9 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<bwd<descriptor_type>, 1>& in,
-                                    sycl::buffer<fwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<bwd<descriptor_type>, 1>& in,
+                                     sycl::buffer<fwd<descriptor_type>, 1>& out) {
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
     auto commit = detail::checked_get_commit(desc);
@@ -164,11 +164,11 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& in_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& in_im,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& out_re,
-                                    sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
+ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& in_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& in_im,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& out_re,
+                                     sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
     auto plan = detail::get_bwd_plan(commit);
@@ -212,8 +212,8 @@ ONEMKL_EXPORT void compute_backward(descriptor_type& desc,
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                           const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                            const std::vector<sycl::event>& deps) {
     const std::string func_name = "compute_backward(desc, inout, deps)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -227,7 +227,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor
         offsets[0] *= 2; // offset is supplied in complex but we offset scalar pointer
     }
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -250,9 +250,10 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd<descriptor
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
-                                           scalar<descriptor_type>* inout_im,
-                                           const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc,
+                                            scalar<descriptor_type>* inout_re,
+                                            scalar<descriptor_type>* inout_im,
+                                            const std::vector<sycl::event>& deps) {
     const std::string func_name = "compute_backward(desc, inout_re, inout_im, deps)";
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
@@ -261,7 +262,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descrip
     auto offsets = detail::get_offsets_bwd(commit);
 
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -283,9 +284,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descrip
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
-                                           fwd<descriptor_type>* out,
-                                           const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor_type>* in,
+                                            fwd<descriptor_type>* out,
+                                            const std::vector<sycl::event>& deps) {
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
     auto commit = detail::checked_get_commit(desc);
@@ -316,11 +317,11 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd<descriptor
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* in_re,
-                                           scalar<descriptor_type>* in_im,
-                                           scalar<descriptor_type>* out_re,
-                                           scalar<descriptor_type>* out_im,
-                                           const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descriptor_type>* in_re,
+                                            scalar<descriptor_type>* in_im,
+                                            scalar<descriptor_type>* out_re,
+                                            scalar<descriptor_type>* out_im,
+                                            const std::vector<sycl::event>& deps) {
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
     auto plan = detail::get_bwd_plan(commit);
@@ -349,4 +350,4 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar<descrip
 // Template function instantiations
 #include "dft/backends/backend_backward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::rocfft
+} // namespace oneapi::math::dft::rocfft
diff --git a/src/dft/backends/rocfft/commit.cpp b/src/dft/backends/rocfft/commit.cpp
index 991b0c471..4c5d51d2f 100644
--- a/src/dft/backends/rocfft/commit.cpp
+++ b/src/dft/backends/rocfft/commit.cpp
@@ -27,12 +27,12 @@
 #include <algorithm>
 #include <optional>
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
-#include "oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp"
-#include "oneapi/mkl/dft/types.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp"
+#include "oneapi/math/dft/types.hpp"
 
 #include "../stride_helper.hpp"
 
@@ -42,7 +42,7 @@
 #include <rocfft-version.h>
 #include <hip/hip_runtime_api.h>
 
-namespace oneapi::mkl::dft::rocfft {
+namespace oneapi::math::dft::rocfft {
 namespace detail {
 
 // rocfft has global setup and cleanup functions which use some global state internally.
@@ -55,7 +55,7 @@ class rocfft_singleton {
     rocfft_singleton() {
         const auto result = rocfft_setup();
         if (result != rocfft_status_success) {
-            throw mkl::exception(
+            throw math::exception(
                 "DFT", "rocfft",
                 "Failed to setup rocfft. returned status " + std::to_string(result));
         }
@@ -85,7 +85,7 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
     using scalar_type = typename dft::detail::commit_impl<prec, dom>::scalar_type;
     // For real to complex transforms, the "transform_type" arg also encodes the direction (e.g. rocfft_transform_type_*_forward vs rocfft_transform_type_*_backward)
     // in the plan so we must have one for each direction.
-    // We also need this because oneMKL uses a directionless "FWD_DISTANCE" and "BWD_DISTANCE" while rocFFT uses a directional "in_distance" and "out_distance".
+    // We also need this because oneMath uses a directionless "FWD_DISTANCE" and "BWD_DISTANCE" while rocFFT uses a directional "in_distance" and "out_distance".
     // The same is also true for "FORWARD_SCALE" and "BACKWARD_SCALE".
     // handles[0] is forward, handles[1] is backward
     std::array<rocfft_handle, 2> handles{};
@@ -93,11 +93,11 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
 
 public:
     rocfft_commit(sycl::queue& queue, const dft::detail::dft_values<prec, dom>& config_values)
-            : oneapi::mkl::dft::detail::commit_impl<prec, dom>(queue, backend::rocfft,
-                                                               config_values) {
+            : oneapi::math::dft::detail::commit_impl<prec, dom>(queue, backend::rocfft,
+                                                                config_values) {
         if constexpr (prec == dft::detail::precision::DOUBLE) {
             if (!queue.get_device().has(sycl::aspect::fp64)) {
-                throw mkl::exception("DFT", "commit", "Device does not support double precision.");
+                throw math::exception("DFT", "commit", "Device does not support double precision.");
             }
         }
         // initialise the rocFFT global state
@@ -107,30 +107,30 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
     void clean_plans() {
         if (handles[0].plan) {
             if (rocfft_plan_destroy(handles[0].plan.value()) != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to destroy forward plan.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to destroy forward plan.");
             }
             handles[0].plan = std::nullopt;
         }
         if (handles[1].plan) {
             if (rocfft_plan_destroy(handles[1].plan.value()) != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to destroy backward plan.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to destroy backward plan.");
             }
             handles[1].plan = std::nullopt;
         }
 
         if (handles[0].info) {
             if (rocfft_execution_info_destroy(handles[0].info.value()) != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to destroy forward execution info .");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to destroy forward execution info .");
             }
             handles[0].info = std::nullopt;
         }
         if (handles[1].info) {
             if (rocfft_execution_info_destroy(handles[1].info.value()) != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to destroy backward execution info .");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to destroy backward execution info .");
             }
             handles[1].info = std::nullopt;
         }
@@ -141,9 +141,9 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
     void commit(const dft::detail::dft_values<prec, dom>& config_values) override {
         // this could be a recommit
         this->external_workspace_helper_ =
-            oneapi::mkl::dft::detail::external_workspace_helper<prec, dom>(
+            oneapi::math::dft::detail::external_workspace_helper<prec, dom>(
                 config_values.workspace_placement ==
-                oneapi::mkl::dft::detail::config_value::WORKSPACE_EXTERNAL);
+                oneapi::math::dft::detail::config_value::WORKSPACE_EXTERNAL);
         clean_plans();
 
         const rocfft_result_placement placement =
@@ -180,7 +180,7 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
 
         constexpr std::size_t max_supported_dims = 3;
         std::array<std::size_t, max_supported_dims> lengths;
-        // rocfft does dimensions in the reverse order to oneMKL
+        // rocfft does dimensions in the reverse order to oneMath
         std::copy(config_values.dimensions.crbegin(), config_values.dimensions.crend(),
                   lengths.data());
 
@@ -214,7 +214,7 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
             }
             else {
                 if (config_values.conj_even_storage != dft::config_value::COMPLEX_COMPLEX) {
-                    throw mkl::exception(
+                    throw math::exception(
                         "dft/backends/rocfft", __FUNCTION__,
                         "only COMPLEX_COMPLEX conjugate_even_storage is supported");
                 }
@@ -243,7 +243,7 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
                     if (strides[i] > strides[j] && strides[i] % cplx_dim != 0 &&
                         strides[i] % real_dim != 0) {
                         // rocfft does not throw, it just produces wrong results
-                        throw oneapi::mkl::unimplemented(
+                        throw oneapi::math::unimplemented(
                             "DFT", func,
                             "rocfft requires a stride to be divisible by all dimensions associated with smaller strides!");
                     }
@@ -271,7 +271,7 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
             if (dom == dft::domain::COMPLEX &&
                 config_values.placement == dft::config_value::NOT_INPLACE && dimensions > 2) {
                 if (stride_vecs.vec_a != stride_vecs.vec_b)
-                    throw oneapi::mkl::unimplemented(
+                    throw oneapi::math::unimplemented(
                         "DFT", func,
                         "due to a bug in rocfft version in use, it requires fwd and bwd stride to be the same for COMPLEX out_of_place computations");
             }
@@ -279,18 +279,18 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
 
         rocfft_plan_description plan_desc_fwd, plan_desc_bwd; // Can't reuse with ROCm 6 due to bug.
         if (rocfft_plan_description_create(&plan_desc_fwd) != rocfft_status_success) {
-            throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                 "Failed to create plan description.");
+            throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                  "Failed to create plan description.");
         }
         if (rocfft_plan_description_create(&plan_desc_bwd) != rocfft_status_success) {
-            throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                 "Failed to create plan description.");
+            throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                  "Failed to create plan description.");
         }
         // plan_description can be destroyed afted plan_create
         auto description_destroy = [](rocfft_plan_description p) {
             if (rocfft_plan_description_destroy(p) != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to destroy plan description.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to destroy plan description.");
             }
         };
         std::unique_ptr<rocfft_plan_description_t, decltype(description_destroy)>
@@ -342,7 +342,7 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
                               (vec_b_valid_as_bwd_domain && vec_a_valid_as_fwd_domain);
 
         if (!valid_forward && !valid_backward) {
-            throw mkl::exception("dft/backends/cufft", __FUNCTION__, "Invalid strides.");
+            throw math::exception("dft/backends/cufft", __FUNCTION__, "Invalid strides.");
         }
 
         if (valid_forward) {
@@ -358,14 +358,14 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
                                                         bwd_distance // out distance
                 );
             if (res != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to set forward data layout.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to set forward data layout.");
             }
 
             if (rocfft_plan_description_set_scale_factor(plan_desc_fwd, config_values.fwd_scale) !=
                 rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to set forward scale factor.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to set forward scale factor.");
             }
 
             rocfft_plan fwd_plan;
@@ -373,16 +373,16 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
                                      lengths.data(), number_of_transforms, plan_desc_fwd);
 
             if (res != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to create forward plan.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to create forward plan.");
             }
 
             handles[0].plan = fwd_plan;
 
             rocfft_execution_info fwd_info;
             if (rocfft_execution_info_create(&fwd_info) != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to create forward execution info.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to create forward execution info.");
             }
             handles[0].info = fwd_info;
 
@@ -391,8 +391,8 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
                 if (work_buf_size != 0) {
                     void* work_buf;
                     if (hipMalloc(&work_buf, work_buf_size) != hipSuccess) {
-                        throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                             "Failed to get allocate forward work buffer.");
+                        throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                              "Failed to get allocate forward work buffer.");
                     }
                     set_workspace_impl(handles[0], reinterpret_cast<scalar_type*>(work_buf),
                                        work_buf_size, "commit");
@@ -414,29 +414,29 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
                                                         fwd_distance // out distance
                 );
             if (res != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to set backward data layout.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to set backward data layout.");
             }
 
             if (rocfft_plan_description_set_scale_factor(plan_desc_bwd, config_values.bwd_scale) !=
                 rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to set backward scale factor.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to set backward scale factor.");
             }
 
             rocfft_plan bwd_plan;
             res = rocfft_plan_create(&bwd_plan, placement, bwd_type, precision, dimensions,
                                      lengths.data(), number_of_transforms, plan_desc_bwd);
             if (res != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to create backward rocFFT plan.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to create backward rocFFT plan.");
             }
             handles[1].plan = bwd_plan;
 
             rocfft_execution_info bwd_info;
             if (rocfft_execution_info_create(&bwd_info) != rocfft_status_success) {
-                throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                     "Failed to create backward execution info.");
+                throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                      "Failed to create backward execution info.");
             }
             handles[1].info = bwd_info;
 
@@ -445,8 +445,8 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
                 if (work_buf_size != 0) {
                     void* work_buf;
                     if (hipMalloc(&work_buf, work_buf_size) != hipSuccess) {
-                        throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                             "Failed to get allocate backward work buffer.");
+                        throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                              "Failed to get allocate backward work buffer.");
                     }
                     set_workspace_impl(handles[1], reinterpret_cast<scalar_type*>(work_buf),
                                        work_buf_size, "commit");
@@ -484,12 +484,12 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
     **/
     std::int64_t get_rocfft_workspace_bytes(rocfft_handle& handle, const char* function) {
         if (!handle.plan) {
-            throw mkl::exception("dft/backends/rocfft", function, "Missing internal rocfft plan");
+            throw math::exception("dft/backends/rocfft", function, "Missing internal rocfft plan");
         }
         std::size_t size = 0;
         if (rocfft_plan_get_work_buffer_size(*handle.plan, &size) != rocfft_status_success) {
-            throw mkl::exception("dft/backends/rocfft", function,
-                                 "Failed to get rocfft work buffer size.");
+            throw math::exception("dft/backends/rocfft", function,
+                                  "Failed to get rocfft work buffer size.");
         }
         return static_cast<std::int64_t>(size);
     }
@@ -505,32 +505,32 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
     void set_workspace_impl(const rocfft_handle& handle, scalar_type* workspace,
                             std::int64_t workspace_bytes, const char* function) {
         if (!handle.info) {
-            throw mkl::exception(
+            throw math::exception(
                 "dft/backends/rocfft", function,
                 "Could not set rocFFT workspace - handle has no associated rocfft_info.");
         }
         if (handle.buffer) {
-            throw mkl::exception(
+            throw math::exception(
                 "dft/backends/rocfft", function,
                 "Could not set rocFFT workspace - an internal buffer is already set.");
         }
         if (workspace_bytes && workspace == nullptr) {
-            throw mkl::exception("dft/backends/rocfft", function, "Trying to nullptr workspace.");
+            throw math::exception("dft/backends/rocfft", function, "Trying to nullptr workspace.");
         }
         auto info = *handle.info;
         if (workspace_bytes &&
             rocfft_execution_info_set_work_buffer(info, static_cast<void*>(workspace),
                                                   static_cast<std::size_t>(workspace_bytes)) !=
                 rocfft_status_success) {
-            throw mkl::exception("dft/backends/rocfft", function, "Failed to set work buffer.");
+            throw math::exception("dft/backends/rocfft", function, "Failed to set work buffer.");
         }
     }
 
     void free_internal_workspace_if_rqd(rocfft_handle& handle, const char* function) {
         if (handle.buffer) {
             if (hipFree(*handle.buffer) != hipSuccess) {
-                throw mkl::exception("dft/backends/rocfft", function,
-                                     "Failed to free internal buffer.");
+                throw math::exception("dft/backends/rocfft", function,
+                                      "Failed to free internal buffer.");
             }
             handle.buffer = std::nullopt;
         }
@@ -583,13 +583,13 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
     std::int64_t get_plan_workspace_size_bytes(rocfft_plan_t* plan) {
         // plan work buffer
         if (plan == nullptr) {
-            throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                 "Missing internal rocFFT plan.");
+            throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                  "Missing internal rocFFT plan.");
         }
         std::size_t work_buf_size;
         if (rocfft_plan_get_work_buffer_size(plan, &work_buf_size) != rocfft_status_success) {
-            throw mkl::exception("dft/backends/rocfft", __FUNCTION__,
-                                 "Failed to get work buffer size.");
+            throw math::exception("dft/backends/rocfft", __FUNCTION__,
+                                  "Failed to get work buffer size.");
         }
         return static_cast<std::int64_t>(work_buf_size);
     }
@@ -668,4 +668,4 @@ get_offsets_bwd<dft::detail::precision::DOUBLE, dft::detail::domain::COMPLEX>(
 
 } //namespace detail
 
-} // namespace oneapi::mkl::dft::rocfft
+} // namespace oneapi::math::dft::rocfft
diff --git a/src/dft/backends/rocfft/descriptor.cpp b/src/dft/backends/rocfft/descriptor.cpp
index 22f21590a..d1bd88887 100644
--- a/src/dft/backends/rocfft/descriptor.cpp
+++ b/src/dft/backends/rocfft/descriptor.cpp
@@ -17,12 +17,12 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 #include "../../descriptor.cxx"
 
-#include "oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp"
+#include "oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp"
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 template <precision prec, domain dom>
 void descriptor<prec, dom>::commit(backend_selector<backend::rocfft> selector) {
@@ -44,4 +44,4 @@ template void descriptor<precision::DOUBLE, domain::COMPLEX>::commit(
 template void descriptor<precision::DOUBLE, domain::REAL>::commit(
     backend_selector<backend::rocfft>);
 
-} //namespace oneapi::mkl::dft::detail
+} //namespace oneapi::math::dft::detail
diff --git a/src/dft/backends/rocfft/execute_helper.hpp b/src/dft/backends/rocfft/execute_helper.hpp
index 626b46a4c..c1ee6302b 100644
--- a/src/dft/backends/rocfft/execute_helper.hpp
+++ b/src/dft/backends/rocfft/execute_helper.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_SRC_EXECUTE_HELPER_ROCFFT_HPP_
-#define _ONEMKL_DFT_SRC_EXECUTE_HELPER_ROCFFT_HPP_
+#ifndef _ONEMATH_DFT_SRC_EXECUTE_HELPER_ROCFFT_HPP_
+#define _ONEMATH_DFT_SRC_EXECUTE_HELPER_ROCFFT_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,35 +26,35 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/dft/detail/commit_impl.hpp"
-#include "oneapi/mkl/dft/detail/descriptor_impl.hpp"
-#include "oneapi/mkl/dft/types.hpp"
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/dft/detail/commit_impl.hpp"
+#include "oneapi/math/dft/detail/descriptor_impl.hpp"
+#include "oneapi/math/dft/types.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 #include <hip/hip_runtime.h>
 #include <rocfft.h>
 
-namespace oneapi::mkl::dft::rocfft::detail {
+namespace oneapi::math::dft::rocfft::detail {
 
 template <dft::precision prec, dft::domain dom>
 inline dft::detail::commit_impl<prec, dom>* checked_get_commit(
     dft::detail::descriptor<prec, dom>& desc) {
     auto commit_handle = dft::detail::get_commit(desc);
     if (commit_handle == nullptr || commit_handle->get_backend() != backend::rocfft) {
-        throw mkl::invalid_argument("dft/backends/rocfft", "get_commit",
-                                    "DFT descriptor has not been commited for rocFFT");
+        throw math::invalid_argument("dft/backends/rocfft", "get_commit",
+                                     "DFT descriptor has not been commited for rocFFT");
     }
     return commit_handle;
 }
 
-/// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match
+/// Throw an math::invalid_argument if the runtime param in the descriptor does not match
 /// the expected value.
 template <dft::config_param Param, dft::config_value Expected, typename DescT>
 inline auto expect_config(DescT& desc, const char* message) {
     dft::config_value actual{ 0 };
     desc.get_value(Param, &actual);
     if (actual != Expected) {
-        throw mkl::invalid_argument("dft/backends/rocfft", "expect_config", message);
+        throw math::invalid_argument("dft/backends/rocfft", "expect_config", message);
     }
 }
 
@@ -68,7 +68,7 @@ inline hipStream_t setup_stream(const std::string& func, sycl::interop_handle& i
     auto stream = ih.get_native_queue<sycl::backend::ext_oneapi_hip>();
     auto result = rocfft_execution_info_set_stream(info, stream);
     if (result != rocfft_status_success) {
-        throw oneapi::mkl::exception(
+        throw oneapi::math::exception(
             "dft/backends/rocfft", func,
             "rocfft_execution_info_set_stream returned " + std::to_string(result));
     }
@@ -78,8 +78,8 @@ inline hipStream_t setup_stream(const std::string& func, sycl::interop_handle& i
 inline void sync_checked(const std::string& func, hipStream_t stream) {
     auto result = hipStreamSynchronize(stream);
     if (result != hipSuccess) {
-        throw oneapi::mkl::exception("dft/backends/rocfft", func,
-                                     "hipStreamSynchronize returned " + std::to_string(result));
+        throw oneapi::math::exception("dft/backends/rocfft", func,
+                                      "hipStreamSynchronize returned " + std::to_string(result));
     }
 }
 
@@ -87,8 +87,8 @@ inline void execute_checked(const std::string& func, hipStream_t stream, const r
                             void* in_buffer[], void* out_buffer[], rocfft_execution_info info) {
     auto result = rocfft_execute(plan, in_buffer, out_buffer, info);
     if (result != rocfft_status_success) {
-        throw oneapi::mkl::exception("dft/backends/rocfft", func,
-                                     "rocfft_execute returned " + std::to_string(result));
+        throw oneapi::math::exception("dft/backends/rocfft", func,
+                                      "rocfft_execute returned " + std::to_string(result));
     }
 #ifndef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND
     // If not using equeue native extension, the host task must wait on the
@@ -98,6 +98,6 @@ inline void execute_checked(const std::string& func, hipStream_t stream, const r
 #endif
 }
 
-} // namespace oneapi::mkl::dft::rocfft::detail
+} // namespace oneapi::math::dft::rocfft::detail
 
-#endif // _ONEMKL_DFT_SRC_EXECUTE_HELPER_ROCFFT_HPP_
+#endif // _ONEMATH_DFT_SRC_EXECUTE_HELPER_ROCFFT_HPP_
diff --git a/src/dft/backends/rocfft/forward.cpp b/src/dft/backends/rocfft/forward.cpp
index e40469fe5..40f8834a3 100644
--- a/src/dft/backends/rocfft/forward.cpp
+++ b/src/dft/backends/rocfft/forward.cpp
@@ -24,10 +24,10 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-#include "oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp"
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 
 #include "execute_helper.hpp"
 #include "../../execute_helper_generic.hpp"
@@ -36,7 +36,7 @@
 #include <rocfft.h>
 #include <hip/hip_runtime_api.h>
 
-namespace oneapi::mkl::dft::rocfft {
+namespace oneapi::math::dft::rocfft {
 
 namespace detail {
 //forward declaration
@@ -58,8 +58,8 @@ rocfft_execution_info get_fwd_info(dft::detail::commit_impl<prec, dom>* commit)
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<fwd<descriptor_type>, 1>& inout) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& inout) {
     const std::string func_name = "compute_forward(desc, inout)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -73,7 +73,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
         offsets[1] *= 2; // offset is supplied in complex but we offset scalar pointer
     }
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -95,9 +95,9 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& inout_im) {
     const std::string func_name = "compute_forward(desc, inout_re, inout_im)";
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
@@ -106,7 +106,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
     auto offsets = detail::get_offsets_fwd(commit);
 
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -134,8 +134,9 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descriptor_type>, 1>& in,
-                                   sycl::buffer<bwd<descriptor_type>, 1>& out) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<fwd<descriptor_type>, 1>& in,
+                                    sycl::buffer<bwd<descriptor_type>, 1>& out) {
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
     auto commit = detail::checked_get_commit(desc);
@@ -166,11 +167,11 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer<fwd<descr
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& in_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& in_im,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& out_re,
-                                   sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
+ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& in_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& in_im,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& out_re,
+                                    sycl::buffer<scalar<descriptor_type>, 1>& out_im) {
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
     auto plan = detail::get_fwd_plan(commit);
@@ -214,8 +215,8 @@ ONEMKL_EXPORT void compute_forward(descriptor_type& desc,
 
 //In-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
-                                          const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* inout,
+                                           const std::vector<sycl::event>& deps) {
     const std::string func_name = "compute_forward(desc, inout, deps)";
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::INPLACE>(
         desc, "Unexpected value for placement");
@@ -229,7 +230,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
         offsets[1] *= 2; // offset is supplied in complex but we offset scalar pointer
     }
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -252,9 +253,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
-                                          scalar<descriptor_type>* inout_im,
-                                          const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* inout_re,
+                                           scalar<descriptor_type>* inout_im,
+                                           const std::vector<sycl::event>& deps) {
     const std::string func_name = "compute_forward(desc, inout_re, inout_im, deps)";
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
@@ -263,7 +264,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
     auto offsets = detail::get_offsets_fwd(commit);
 
     if (offsets[0] != offsets[1]) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "DFT", func_name,
             "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!");
     }
@@ -284,9 +285,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
 
 //Out-of-place transform
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
-                                          bwd<descriptor_type>* out,
-                                          const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_type>* in,
+                                           bwd<descriptor_type>* out,
+                                           const std::vector<sycl::event>& deps) {
     detail::expect_config<dft::config_param::PLACEMENT, dft::config_value::NOT_INPLACE>(
         desc, "Unexpected value for placement");
     auto commit = detail::checked_get_commit(desc);
@@ -317,11 +318,11 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd<descriptor_
 
 //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format
 template <typename descriptor_type>
-ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* in_re,
-                                          scalar<descriptor_type>* in_im,
-                                          scalar<descriptor_type>* out_re,
-                                          scalar<descriptor_type>* out_im,
-                                          const std::vector<sycl::event>& deps) {
+ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descriptor_type>* in_re,
+                                           scalar<descriptor_type>* in_im,
+                                           scalar<descriptor_type>* out_re,
+                                           scalar<descriptor_type>* out_im,
+                                           const std::vector<sycl::event>& deps) {
     auto commit = detail::checked_get_commit(desc);
     auto queue = commit->get_queue();
     auto plan = detail::get_fwd_plan(commit);
@@ -350,4 +351,4 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar<descript
 // Template function instantiations
 #include "dft/backends/backend_forward_instantiations.cxx"
 
-} // namespace oneapi::mkl::dft::rocfft
+} // namespace oneapi::math::dft::rocfft
diff --git a/src/dft/backends/rocfft/rocfft_handle.hpp b/src/dft/backends/rocfft/rocfft_handle.hpp
index ea4f44d68..04eca565d 100644
--- a/src/dft/backends/rocfft/rocfft_handle.hpp
+++ b/src/dft/backends/rocfft/rocfft_handle.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_SRC_ROCFFT_ROCFFT_HANDLE_HPP_
-#define _ONEMKL_DFT_SRC_ROCFFT_ROCFFT_HANDLE_HPP_
+#ifndef _ONEMATH_DFT_SRC_ROCFFT_ROCFFT_HANDLE_HPP_
+#define _ONEMATH_DFT_SRC_ROCFFT_ROCFFT_HANDLE_HPP_
 
 #include <optional>
 
diff --git a/src/dft/backends/rocfft/mkl_dft_rocfft_wrappers.cpp b/src/dft/backends/rocfft/rocfft_wrappers.cpp
similarity index 89%
rename from src/dft/backends/rocfft/mkl_dft_rocfft_wrappers.cpp
rename to src/dft/backends/rocfft/rocfft_wrappers.cpp
index c8f0e35c7..d2d198b78 100644
--- a/src/dft/backends/rocfft/mkl_dft_rocfft_wrappers.cpp
+++ b/src/dft/backends/rocfft/rocfft_wrappers.cpp
@@ -17,13 +17,13 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/detail/rocfft/onemkl_dft_rocfft.hpp"
+#include "oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp"
 #include "dft/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         rocfft
 
-extern "C" dft_function_table_t mkl_dft_table = {
+extern "C" dft_function_table_t onemath_dft_table = {
     WRAPPER_VERSION,
 #include "dft/backends/backend_wrappers.cxx"
 };
diff --git a/src/dft/backends/stride_helper.hpp b/src/dft/backends/stride_helper.hpp
index 6c3146c99..8375b472e 100644
--- a/src/dft/backends/stride_helper.hpp
+++ b/src/dft/backends/stride_helper.hpp
@@ -20,7 +20,7 @@
 #ifndef _DFT_DETAIL_STRIDE_HELPER_HPP_
 #define _DFT_DETAIL_STRIDE_HELPER_HPP_
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 enum class stride_api {
     INVALID, // Cannot choose: no valid choice
@@ -37,7 +37,7 @@ enum class stride_api {
 inline void throw_on_invalid_stride_api(const char* function,
                                         stride_api stride_choice = stride_api::INVALID) {
     if (stride_choice == stride_api::INVALID) {
-        throw mkl::invalid_argument(
+        throw math::invalid_argument(
             "DFT", function,
             "Invalid INPUT/OUTPUT or FWD/BACKWARD strides. API usage may have been mixed.");
     }
@@ -75,8 +75,8 @@ struct stride_vectors {
               bwd_in(stride_choice == stride_api::FB_STRIDES ? vec_b : vec_a),
               bwd_out(stride_choice == stride_api::FB_STRIDES ? vec_a : vec_b) {
         if (stride_choice == stride_api::INVALID) {
-            throw mkl::exception("DFT", "detail::stride_vector constructor",
-                                 "Internal error: invalid stride API");
+            throw math::exception("DFT", "detail::stride_vector constructor",
+                                  "Internal error: invalid stride API");
         }
         auto& v1 = stride_choice == stride_api::FB_STRIDES ? config_values.fwd_strides
                                                            : config_values.input_strides;
@@ -88,8 +88,8 @@ struct stride_vectors {
         for (std::size_t i{ 0 }; i < v1.size(); ++i) { // v1.size() == v2.size()
             if constexpr (std::is_unsigned_v<StrideElemT>) {
                 if (v1[i] < 0 || v2[i] < 0) {
-                    throw mkl::unimplemented("DFT", "commit",
-                                             "Backend does not support negative strides.");
+                    throw math::unimplemented("DFT", "commit",
+                                              "Backend does not support negative strides.");
                 }
             }
             vec_a[i] = static_cast<StrideElemT>(v1[i]);
@@ -146,6 +146,6 @@ inline stride_api get_stride_api(const ConfigT& config_values) {
     return stride_api::INVALID;
 }
 
-} // namespace oneapi::mkl::dft::detail
+} // namespace oneapi::math::dft::detail
 
 #endif //_DFT_DETAIL_STRIDE_HELPER_HPP_
diff --git a/src/dft/descriptor.cxx b/src/dft/descriptor.cxx
index aa0d9d70e..57ed1f086 100644
--- a/src/dft/descriptor.cxx
+++ b/src/dft/descriptor.cxx
@@ -18,13 +18,13 @@
 *******************************************************************************/
 #include <cstdarg>
 
-#include "oneapi/mkl/detail/exceptions.hpp"
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/detail/exceptions.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 
 #include "dft/descriptor_config_helper.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace detail {
 
@@ -58,10 +58,10 @@ void descriptor<prec, dom>::set_value(config_param param, ...) {
     va_start(vl, param);
     switch (param) {
         case config_param::FORWARD_DOMAIN:
-            throw mkl::invalid_argument("DFT", "set_value", "Read-only parameter.");
+            throw math::invalid_argument("DFT", "set_value", "Read-only parameter.");
             break;
         case config_param::DIMENSION:
-            throw mkl::invalid_argument("DFT", "set_value", "Read-only parameter.");
+            throw math::invalid_argument("DFT", "set_value", "Read-only parameter.");
             break;
         case config_param::LENGTHS: {
             if (values_.dimensions.size() == 1) {
@@ -74,7 +74,7 @@ void descriptor<prec, dom>::set_value(config_param param, ...) {
             break;
         }
         case config_param::PRECISION:
-            throw mkl::invalid_argument("DFT", "set_value", "Read-only parameter.");
+            throw math::invalid_argument("DFT", "set_value", "Read-only parameter.");
             break;
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
@@ -136,15 +136,15 @@ void descriptor<prec, dom>::set_value(config_param param, ...) {
             detail::set_value<config_param::WORKSPACE_PLACEMENT>(values_, va_arg(vl, config_value));
             break;
         case config_param::WORKSPACE_EXTERNAL_BYTES:
-            throw mkl::invalid_argument("DFT", "set_value", "Read-only parameter.");
+            throw math::invalid_argument("DFT", "set_value", "Read-only parameter.");
             break;
         case config_param::PACKED_FORMAT:
             detail::set_value<config_param::PACKED_FORMAT>(values_, va_arg(vl, config_value));
             break;
         case config_param::COMMIT_STATUS:
-            throw mkl::invalid_argument("DFT", "set_value", "Read-only parameter.");
+            throw math::invalid_argument("DFT", "set_value", "Read-only parameter.");
             break;
-        default: throw mkl::invalid_argument("DFT", "set_value", "Invalid config_param argument.");
+        default: throw math::invalid_argument("DFT", "set_value", "Invalid config_param argument.");
     }
     va_end(vl);
 }
@@ -152,12 +152,12 @@ void descriptor<prec, dom>::set_value(config_param param, ...) {
 template <precision prec, domain dom>
 descriptor<prec, dom>::descriptor(std::vector<std::int64_t> dimensions) {
     if (dimensions.size() == 0) {
-        throw mkl::invalid_argument("DFT", "descriptor", "Cannot have 0 dimensional DFT.");
+        throw math::invalid_argument("DFT", "descriptor", "Cannot have 0 dimensional DFT.");
     }
     for (const auto& dim : dimensions) {
         if (dim <= 0) {
-            throw mkl::invalid_argument("DFT", "descriptor",
-                                        "Invalid dimension value (negative or 0).");
+            throw math::invalid_argument("DFT", "descriptor",
+                                         "Invalid dimension value (negative or 0).");
         }
     }
     compute_default_strides<dom>(dimensions, values_.fwd_strides, values_.bwd_strides);
@@ -198,7 +198,7 @@ void descriptor<prec, dom>::get_value(config_param param, ...) const {
     va_list vl;
     va_start(vl, param);
     if (va_arg(vl, void*) == nullptr) {
-        throw mkl::invalid_argument("DFT", "get_value", "config_param is nullptr.");
+        throw math::invalid_argument("DFT", "get_value", "config_param is nullptr.");
     }
     va_end(vl);
     va_start(vl, param);
@@ -256,7 +256,7 @@ void descriptor<prec, dom>::get_value(config_param param, ...) const {
             break;
         case config_param::WORKSPACE_EXTERNAL_BYTES:
             if (!pimpl_) {
-                throw mkl::invalid_argument(
+                throw math::invalid_argument(
                     "DFT", "get_value",
                     "Cannot query WORKSPACE_EXTERNAL_BYTES on uncommitted descriptor.");
             }
@@ -271,7 +271,7 @@ void descriptor<prec, dom>::get_value(config_param param, ...) const {
             *va_arg(vl, config_value*) =
                 pimpl_ ? config_value::COMMITTED : config_value::UNCOMMITTED;
             break;
-        default: throw mkl::invalid_argument("DFT", "get_value", "Invalid config_param argument.");
+        default: throw math::invalid_argument("DFT", "get_value", "Invalid config_param argument.");
     }
     va_end(vl);
 }
@@ -282,8 +282,8 @@ void descriptor<prec, dom>::set_workspace(scalar_type* usm_workspace) {
         return pimpl_->set_workspace(usm_workspace);
     }
     else {
-        throw mkl::uninitialized("DFT", "set_workspace",
-                                 "Can only set workspace on committed descriptor.");
+        throw math::uninitialized("DFT", "set_workspace",
+                                  "Can only set workspace on committed descriptor.");
     }
 }
 
@@ -293,8 +293,8 @@ void descriptor<prec, dom>::set_workspace(sycl::buffer<scalar_type>& buffer_work
         return pimpl_->set_workspace(buffer_workspace);
     }
     else {
-        throw mkl::uninitialized("DFT", "set_workspace",
-                                 "Can only set workspace on committed descriptor.");
+        throw math::uninitialized("DFT", "set_workspace",
+                                  "Can only set workspace on committed descriptor.");
     }
 }
 
@@ -305,5 +305,5 @@ template class descriptor<precision::DOUBLE, domain::REAL>;
 
 } //namespace detail
 } //namespace dft
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
diff --git a/src/dft/descriptor_config_helper.hpp b/src/dft/descriptor_config_helper.hpp
index dc8c97ac2..00e3c8c79 100644
--- a/src/dft/descriptor_config_helper.hpp
+++ b/src/dft/descriptor_config_helper.hpp
@@ -17,16 +17,16 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DETAIL_DESCRIPTOR_CONFIG_HELPER_HPP_
-#define _ONEMKL_DETAIL_DESCRIPTOR_CONFIG_HELPER_HPP_
+#ifndef _ONEMATH_DETAIL_DESCRIPTOR_CONFIG_HELPER_HPP_
+#define _ONEMATH_DETAIL_DESCRIPTOR_CONFIG_HELPER_HPP_
 
 #include <cstdint>
 #include <type_traits>
 
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace dft {
 namespace detail {
 
@@ -113,18 +113,18 @@ void set_value(dft_values<prec, dom>& vals,
                param_type_helper_t<real_helper_t<prec>, Param>&& set_val) {
     if constexpr (Param == config_param::LENGTHS) {
         if (set_val == nullptr) {
-            throw mkl::invalid_argument("DFT", "set_value", "Given nullptr.");
+            throw math::invalid_argument("DFT", "set_value", "Given nullptr.");
         }
         for (std::size_t i{ 0 }; i < vals.dimensions.size(); ++i) {
             if (set_val[i] <= 0) {
-                throw mkl::invalid_argument("DFT", "set_value",
-                                            "Invalid length value (negative or 0).");
+                throw math::invalid_argument("DFT", "set_value",
+                                             "Invalid length value (negative or 0).");
             }
         }
         std::copy(set_val, set_val + vals.dimensions.size(), vals.dimensions.begin());
     }
     else if constexpr (Param == config_param::PRECISION) {
-        throw mkl::invalid_argument("DFT", "set_value", "Read-only parameter.");
+        throw math::invalid_argument("DFT", "set_value", "Read-only parameter.");
     }
     else if constexpr (Param == config_param::FORWARD_SCALE) {
         vals.fwd_scale = set_val;
@@ -134,8 +134,8 @@ void set_value(dft_values<prec, dom>& vals,
     }
     else if constexpr (Param == config_param::NUMBER_OF_TRANSFORMS) {
         if (set_val <= 0) {
-            throw mkl::invalid_argument("DFT", "set_value",
-                                        "Number of transforms must be positive.");
+            throw math::invalid_argument("DFT", "set_value",
+                                         "Number of transforms must be positive.");
         }
         vals.number_of_transforms = set_val;
     }
@@ -144,8 +144,8 @@ void set_value(dft_values<prec, dom>& vals,
             vals.complex_storage = set_val;
         }
         else {
-            throw mkl::invalid_argument("DFT", "set_value",
-                                        "Complex storage must be complex_complex or real_real.");
+            throw math::invalid_argument("DFT", "set_value",
+                                         "Complex storage must be complex_complex or real_real.");
         }
     }
     else if constexpr (Param == config_param::REAL_STORAGE) {
@@ -153,7 +153,7 @@ void set_value(dft_values<prec, dom>& vals,
             vals.real_storage = set_val;
         }
         else {
-            throw mkl::invalid_argument("DFT", "set_value", "Real storage must be real_real.");
+            throw math::invalid_argument("DFT", "set_value", "Real storage must be real_real.");
         }
     }
     else if constexpr (Param == config_param::CONJUGATE_EVEN_STORAGE) {
@@ -161,8 +161,8 @@ void set_value(dft_values<prec, dom>& vals,
             vals.conj_even_storage = set_val;
         }
         else {
-            throw mkl::invalid_argument("DFT", "set_value",
-                                        "Conjugate even storage must be complex_complex.");
+            throw math::invalid_argument("DFT", "set_value",
+                                         "Conjugate even storage must be complex_complex.");
         }
     }
     else if constexpr (Param == config_param::PLACEMENT) {
@@ -170,22 +170,22 @@ void set_value(dft_values<prec, dom>& vals,
             vals.placement = set_val;
         }
         else {
-            throw mkl::invalid_argument("DFT", "set_value",
-                                        "Placement must be inplace or not inplace.");
+            throw math::invalid_argument("DFT", "set_value",
+                                         "Placement must be inplace or not inplace.");
         }
     }
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
     else if constexpr (Param == config_param::INPUT_STRIDES) {
         if (set_val == nullptr) {
-            throw mkl::invalid_argument("DFT", "set_value", "Given nullptr.");
+            throw math::invalid_argument("DFT", "set_value", "Given nullptr.");
         }
         reset_strides_to_zero(vals.fwd_strides, vals.bwd_strides);
         std::copy(set_val, set_val + vals.dimensions.size() + 1, vals.input_strides.begin());
     }
     else if constexpr (Param == config_param::OUTPUT_STRIDES) {
         if (set_val == nullptr) {
-            throw mkl::invalid_argument("DFT", "set_value", "Given nullptr.");
+            throw math::invalid_argument("DFT", "set_value", "Given nullptr.");
         }
         reset_strides_to_zero(vals.fwd_strides, vals.bwd_strides);
         std::copy(set_val, set_val + vals.dimensions.size() + 1, vals.output_strides.begin());
@@ -202,7 +202,7 @@ void set_value(dft_values<prec, dom>& vals,
             vals.workspace = set_val;
         }
         else {
-            throw mkl::invalid_argument("DFT", "set_value", "Workspace must be allow or avoid.");
+            throw math::invalid_argument("DFT", "set_value", "Workspace must be allow or avoid.");
         }
     }
     else if constexpr (Param == config_param::WORKSPACE_PLACEMENT) {
@@ -211,20 +211,20 @@ void set_value(dft_values<prec, dom>& vals,
             vals.workspace_placement = set_val;
         }
         else {
-            throw mkl::invalid_argument(
+            throw math::invalid_argument(
                 "DFT", "set_value", "Workspace must be WORKSPACE_AUTOMATIC or WORKSPACE_EXTERNAL.");
         }
     }
     else if constexpr (Param == config_param::WORKSPACE_EXTERNAL_BYTES) {
-        throw mkl::invalid_argument("DFT", "set_value", "Read-only parameter.");
+        throw math::invalid_argument("DFT", "set_value", "Read-only parameter.");
     }
     else if constexpr (Param == config_param::ORDERING) {
         if (set_val == config_value::ORDERED || set_val == config_value::BACKWARD_SCRAMBLED) {
             vals.ordering = set_val;
         }
         else {
-            throw mkl::invalid_argument("DFT", "set_value",
-                                        "Ordering must be ordered or backwards scrambled.");
+            throw math::invalid_argument("DFT", "set_value",
+                                         "Ordering must be ordered or backwards scrambled.");
         }
     }
     else if constexpr (Param == config_param::TRANSPOSE) {
@@ -235,19 +235,19 @@ void set_value(dft_values<prec, dom>& vals,
             vals.packed_format = set_val;
         }
         else {
-            throw mkl::invalid_argument("DFT", "set_value", "Packed format must be CCE.");
+            throw math::invalid_argument("DFT", "set_value", "Packed format must be CCE.");
         }
     }
     else if constexpr (Param == config_param::FWD_STRIDES) {
         if (set_val == nullptr) {
-            throw mkl::invalid_argument("DFT", "set_value", "Given nullptr.");
+            throw math::invalid_argument("DFT", "set_value", "Given nullptr.");
         }
         reset_strides_to_zero(vals.input_strides, vals.output_strides);
         std::copy(set_val, set_val + vals.dimensions.size() + 1, vals.fwd_strides.begin());
     }
     else if constexpr (Param == config_param::BWD_STRIDES) {
         if (set_val == nullptr) {
-            throw mkl::invalid_argument("DFT", "set_value", "Given nullptr.");
+            throw math::invalid_argument("DFT", "set_value", "Given nullptr.");
         }
         reset_strides_to_zero(vals.input_strides, vals.output_strides);
         std::copy(set_val, set_val + vals.dimensions.size() + 1, vals.bwd_strides.begin());
@@ -256,7 +256,7 @@ void set_value(dft_values<prec, dom>& vals,
 
 } // namespace detail
 } // namespace dft
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif //_ONEMKL_DETAIL_DESCRIPTOR_CONFIG_HELPER_HPP_
+#endif //_ONEMATH_DETAIL_DESCRIPTOR_CONFIG_HELPER_HPP_
diff --git a/src/dft/dft_loader.cpp b/src/dft/dft_loader.cpp
index 55a280388..3984208b2 100644
--- a/src/dft/dft_loader.cpp
+++ b/src/dft/dft_loader.cpp
@@ -17,17 +17,17 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/dft/detail/dft_loader.hpp"
-#include "oneapi/mkl/dft/forward.hpp"
-#include "oneapi/mkl/dft/backward.hpp"
+#include "oneapi/math/dft/detail/dft_loader.hpp"
+#include "oneapi/math/dft/forward.hpp"
+#include "oneapi/math/dft/backward.hpp"
 
 #include "function_table_initializer.hpp"
 #include "dft/function_table.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
-static oneapi::mkl::detail::table_initializer<mkl::domain::dft, dft_function_table_t>
+static oneapi::math::detail::table_initializer<math::domain::dft, dft_function_table_t>
     function_tables;
 
 template <>
@@ -59,14 +59,14 @@ commit_impl<precision::DOUBLE, domain::REAL>* create_commit<precision::DOUBLE, d
 }
 
 template <precision prec, domain dom>
-inline oneapi::mkl::device get_device(descriptor<prec, dom>& desc, const char* func_name) {
+inline oneapi::math::device get_device(descriptor<prec, dom>& desc, const char* func_name) {
     config_value is_committed{ config_value::UNCOMMITTED };
     desc.get_value(config_param::COMMIT_STATUS, &is_committed);
     if (is_committed != config_value::COMMITTED) {
-        throw mkl::invalid_argument("DFT", func_name, "Descriptor not committed.");
+        throw math::invalid_argument("DFT", func_name, "Descriptor not committed.");
     }
     // Committed means that the commit pointer is not null.
     return get_device_id(get_commit(desc)->get_queue());
 }
 
-} // namespace oneapi::mkl::dft::detail
+} // namespace oneapi::math::dft::detail
diff --git a/src/dft/execute_helper_generic.hpp b/src/dft/execute_helper_generic.hpp
index a7939a653..4f5ae6727 100644
--- a/src/dft/execute_helper_generic.hpp
+++ b/src/dft/execute_helper_generic.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_DFT_SRC_EXECUTE_HELPER_GENERIC_HPP_
-#define _ONEMKL_DFT_SRC_EXECUTE_HELPER_GENERIC_HPP_
+#ifndef _ONEMATH_DFT_SRC_EXECUTE_HELPER_GENERIC_HPP_
+#define _ONEMATH_DFT_SRC_EXECUTE_HELPER_GENERIC_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-namespace oneapi::mkl::dft::detail {
+namespace oneapi::math::dft::detail {
 
 /** Wrap interop API to launch interop host task.
  * 
@@ -48,6 +48,6 @@ static inline void fft_enqueue_task(HandlerT&& cgh, FnT&& f) {
     });
 }
 
-} // namespace oneapi::mkl::dft::detail
+} // namespace oneapi::math::dft::detail
 
-#endif // _ONEMKL_DFT_SRC_EXECUTE_HELPER_GENERIC_HPP_
+#endif // _ONEMATH_DFT_SRC_EXECUTE_HELPER_GENERIC_HPP_
diff --git a/src/dft/function_table.hpp b/src/dft/function_table.hpp
index 9146f239e..d1c647eb9 100644
--- a/src/dft/function_table.hpp
+++ b/src/dft/function_table.hpp
@@ -29,33 +29,35 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/dft/types.hpp"
-#include "oneapi/mkl/dft/descriptor.hpp"
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/dft/types.hpp"
+#include "oneapi/math/dft/descriptor.hpp"
 
 typedef struct {
     int version;
-    oneapi::mkl::dft::detail::commit_impl<oneapi::mkl::dft::precision::SINGLE,
-                                          oneapi::mkl::dft::domain::COMPLEX>* (
+    oneapi::math::dft::detail::commit_impl<oneapi::math::dft::precision::SINGLE,
+                                           oneapi::math::dft::domain::COMPLEX>* (
         *create_commit_sycl_fz)(
-        const oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::SINGLE,
-                                           oneapi::mkl::dft::domain::COMPLEX>& desc,
+        const oneapi::math::dft::descriptor<oneapi::math::dft::precision::SINGLE,
+                                            oneapi::math::dft::domain::COMPLEX>& desc,
         sycl::queue& sycl_queue);
-    oneapi::mkl::dft::detail::commit_impl<oneapi::mkl::dft::precision::DOUBLE,
-                                          oneapi::mkl::dft::domain::COMPLEX>* (
+    oneapi::math::dft::detail::commit_impl<oneapi::math::dft::precision::DOUBLE,
+                                           oneapi::math::dft::domain::COMPLEX>* (
         *create_commit_sycl_dz)(
-        const oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::DOUBLE,
-                                           oneapi::mkl::dft::domain::COMPLEX>& desc,
+        const oneapi::math::dft::descriptor<oneapi::math::dft::precision::DOUBLE,
+                                            oneapi::math::dft::domain::COMPLEX>& desc,
         sycl::queue& sycl_queue);
-    oneapi::mkl::dft::detail::commit_impl<oneapi::mkl::dft::precision::SINGLE,
-                                          oneapi::mkl::dft::domain::REAL>* (*create_commit_sycl_fr)(
-        const oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::SINGLE,
-                                           oneapi::mkl::dft::domain::REAL>& desc,
+    oneapi::math::dft::detail::commit_impl<oneapi::math::dft::precision::SINGLE,
+                                           oneapi::math::dft::domain::REAL>* (
+        *create_commit_sycl_fr)(
+        const oneapi::math::dft::descriptor<oneapi::math::dft::precision::SINGLE,
+                                            oneapi::math::dft::domain::REAL>& desc,
         sycl::queue& sycl_queue);
-    oneapi::mkl::dft::detail::commit_impl<oneapi::mkl::dft::precision::DOUBLE,
-                                          oneapi::mkl::dft::domain::REAL>* (*create_commit_sycl_dr)(
-        const oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::DOUBLE,
-                                           oneapi::mkl::dft::domain::REAL>& desc,
+    oneapi::math::dft::detail::commit_impl<oneapi::math::dft::precision::DOUBLE,
+                                           oneapi::math::dft::domain::REAL>* (
+        *create_commit_sycl_dr)(
+        const oneapi::math::dft::descriptor<oneapi::math::dft::precision::DOUBLE,
+                                            oneapi::math::dft::domain::REAL>& desc,
         sycl::queue& sycl_queue);
 } dft_function_table_t;
 
diff --git a/src/include/allocator_helper.hpp b/src/include/allocator_helper.hpp
index 2678dc114..433e0468e 100644
--- a/src/include/allocator_helper.hpp
+++ b/src/include/allocator_helper.hpp
@@ -27,7 +27,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 static inline void* aligned_alloc(size_t align, size_t size) {
 #ifdef _WIN64
@@ -44,7 +44,7 @@ static inline void aligned_free(void* p) {
     ::free(p);
 #endif
 }
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // __ALLOCATOR_HELPER_HPP
diff --git a/src/include/common_onemkl_conversion.hpp b/src/include/common_onemkl_conversion.hpp
new file mode 100644
index 000000000..a84a45286
--- /dev/null
+++ b/src/include/common_onemkl_conversion.hpp
@@ -0,0 +1,152 @@
+/*******************************************************************************
+* Copyright Codeplay Software Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#ifndef _ONEMATH_SRC_INCLUDE_COMMON_ONEMKL_TYPES_CONVERSION_HPP_
+#define _ONEMATH_SRC_INCLUDE_COMMON_ONEMKL_TYPES_CONVERSION_HPP_
+
+// The file is used to convert oneMath types to Intel(R) oneMKL types for all the common types shared across domains.
+// The file assumes that the common types are identical between the 2 libraries, except for their namespace.
+
+#include <mkl/exceptions.hpp>
+#include <mkl/types.hpp>
+
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/exceptions.hpp"
+
+namespace oneapi {
+namespace math {
+namespace detail {
+
+inline auto get_onemkl_transpose(oneapi::math::transpose* param_ptr) {
+    return reinterpret_cast<oneapi::mkl::transpose*>(param_ptr);
+}
+inline auto get_onemkl_transpose(oneapi::math::transpose param) {
+    return *get_onemkl_transpose(&param);
+}
+
+inline auto get_onemkl_uplo(oneapi::math::uplo* param_ptr) {
+    return reinterpret_cast<oneapi::mkl::uplo*>(param_ptr);
+}
+inline auto get_onemkl_uplo(oneapi::math::uplo param) {
+    return *get_onemkl_uplo(&param);
+}
+
+inline auto get_onemkl_diag(oneapi::math::diag* param_ptr) {
+    return reinterpret_cast<oneapi::mkl::diag*>(param_ptr);
+}
+inline auto get_onemkl_diag(oneapi::math::diag param) {
+    return *get_onemkl_diag(&param);
+}
+
+inline auto get_onemkl_side(oneapi::math::side* param_ptr) {
+    return reinterpret_cast<oneapi::mkl::side*>(param_ptr);
+}
+inline auto get_onemkl_side(oneapi::math::side param) {
+    return *get_onemkl_side(&param);
+}
+
+inline auto get_onemkl_offset(oneapi::math::offset param) {
+    return *reinterpret_cast<oneapi::mkl::offset*>(&param);
+}
+
+inline auto get_onemkl_layout(oneapi::math::layout param) {
+    return *reinterpret_cast<oneapi::mkl::layout*>(&param);
+}
+
+inline auto get_onemkl_index_base(oneapi::math::index_base param) {
+    return *reinterpret_cast<oneapi::mkl::index_base*>(&param);
+}
+
+inline auto get_onemkl_job(oneapi::math::job param) {
+    return *reinterpret_cast<oneapi::mkl::job*>(&param);
+}
+
+inline auto get_onemkl_jobsvd(oneapi::math::jobsvd param) {
+    return *reinterpret_cast<oneapi::mkl::jobsvd*>(&param);
+}
+
+inline auto get_onemkl_generate(oneapi::math::generate param) {
+    return *reinterpret_cast<oneapi::mkl::generate*>(&param);
+}
+
+inline auto get_onemkl_compz(oneapi::math::compz param) {
+    return *reinterpret_cast<oneapi::mkl::compz*>(&param);
+}
+
+inline auto get_onemkl_direct(oneapi::math::direct param) {
+    return *reinterpret_cast<oneapi::mkl::direct*>(&param);
+}
+
+inline auto get_onemkl_storev(oneapi::math::storev param) {
+    return *reinterpret_cast<oneapi::mkl::storev*>(&param);
+}
+
+inline auto get_onemkl_rangev(oneapi::math::rangev param) {
+    return *reinterpret_cast<oneapi::mkl::rangev*>(&param);
+}
+
+inline auto get_onemkl_order(oneapi::math::order param) {
+    return *reinterpret_cast<oneapi::mkl::order*>(&param);
+}
+
+// Rethrow Intel(R) oneMKL exceptions as oneMath exceptions
+#define RETHROW_ONEMKL_EXCEPTIONS(EXPRESSION)              \
+    do {                                                   \
+        try {                                              \
+            EXPRESSION;                                    \
+        }                                                  \
+        catch (const oneapi::mkl::unsupported_device& e) { \
+            throw unsupported_device(e.what());            \
+        }                                                  \
+        catch (const oneapi::mkl::host_bad_alloc& e) {     \
+            throw host_bad_alloc(e.what());                \
+        }                                                  \
+        catch (const oneapi::mkl::device_bad_alloc& e) {   \
+            throw device_bad_alloc(e.what());              \
+        }                                                  \
+        catch (const oneapi::mkl::unimplemented& e) {      \
+            throw unimplemented(e.what());                 \
+        }                                                  \
+        catch (const oneapi::mkl::invalid_argument& e) {   \
+            throw invalid_argument(e.what());              \
+        }                                                  \
+        catch (const oneapi::mkl::uninitialized& e) {      \
+            throw uninitialized(e.what());                 \
+        }                                                  \
+        catch (const oneapi::mkl::computation_error& e) {  \
+            throw computation_error(e.what());             \
+        }                                                  \
+        catch (const oneapi::mkl::batch_error& e) {        \
+            throw batch_error(e.what());                   \
+        }                                                  \
+        catch (const oneapi::mkl::exception& e) {          \
+            throw exception(e.what());                     \
+        }                                                  \
+    } while (0)
+
+#define RETHROW_ONEMKL_EXCEPTIONS_RET(EXPRESSION)     \
+    do {                                              \
+        RETHROW_ONEMKL_EXCEPTIONS(return EXPRESSION); \
+    } while (0)
+
+} // namespace detail
+} // namespace math
+} // namespace oneapi
+
+#endif // _ONEMATH_SRC_INCLUDE_COMMON_ONEMKL_TYPES_CONVERSION_HPP_
diff --git a/src/include/dtype_string.hpp b/src/include/dtype_string.hpp
index 6f2a87feb..528b7cee7 100644
--- a/src/include/dtype_string.hpp
+++ b/src/include/dtype_string.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_ERROR_HELPER_HPP_
-#define _ONEMKL_ERROR_HELPER_HPP_
+#ifndef _ONEMATH_ERROR_HELPER_HPP_
+#define _ONEMATH_ERROR_HELPER_HPP_
 
 #include <string>
 
@@ -53,4 +53,4 @@ inline const std::string dtype_string<std::int8_t>() {
     return "int8";
 }
 
-#endif //_ONEMKL_ERROR_HELPER_HPP_
+#endif //_ONEMATH_ERROR_HELPER_HPP_
diff --git a/src/include/exceptions_helper.hpp b/src/include/exceptions_helper.hpp
index 9db0f0ddd..cc1c46ba5 100644
--- a/src/include/exceptions_helper.hpp
+++ b/src/include/exceptions_helper.hpp
@@ -23,14 +23,14 @@
 #include <stdexcept>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 class backend_unsupported_exception : public std::runtime_error {
 public:
     backend_unsupported_exception() : std::runtime_error("Not yet supported for this backend") {}
 };
 
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // __EXCEPTIONS_HELPER_HPP
diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp
index 0c0e040c0..3e1f0824b 100644
--- a/src/include/function_table_initializer.hpp
+++ b/src/include/function_table_initializer.hpp
@@ -23,8 +23,8 @@
 #include <cstdint>
 #include <map>
 
-#include "oneapi/mkl/detail/backends_table.hpp"
-#include "oneapi/mkl/detail/exceptions.hpp"
+#include "oneapi/math/detail/backends_table.hpp"
+#include "oneapi/math/detail/exceptions.hpp"
 
 #define SPEC_VERSION 1
 
@@ -45,10 +45,10 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace detail {
 
-template <oneapi::mkl::domain domain_id, typename function_table_t>
+template <oneapi::math::domain domain_id, typename function_table_t>
 class table_initializer {
     struct handle_deleter {
         using pointer = LIB_TYPE;
@@ -59,7 +59,7 @@ class table_initializer {
     using dlhandle = std::unique_ptr<LIB_TYPE, handle_deleter>;
 
 public:
-    function_table_t& operator[](std::pair<oneapi::mkl::device, sycl::queue> device_queue_pair) {
+    function_table_t& operator[](std::pair<oneapi::math::device, sycl::queue> device_queue_pair) {
         auto lib = tables.find(device_queue_pair.first);
         if (lib != tables.end())
             return lib->second;
@@ -96,7 +96,7 @@ class table_initializer {
     }
 #endif
 
-    function_table_t& add_table(oneapi::mkl::device key, sycl::queue& q) {
+    function_table_t& add_table(oneapi::math::device key, sycl::queue& q) {
         dlhandle handle;
         // check all available libraries for the key(device)
         for (const char* libname : libraries[domain_id][key]) {
@@ -105,12 +105,12 @@ class table_initializer {
                 break;
         }
         if (!handle) {
-            if (!is_generic_device_supported && key == oneapi::mkl::device::generic_device) {
-                throw mkl::unsupported_device("", "", q.get_device());
+            if (!is_generic_device_supported && key == oneapi::math::device::generic_device) {
+                throw math::unsupported_device("", "", q.get_device());
             }
             else {
                 std::cerr << ERROR_MSG << '\n';
-                throw mkl::backend_not_found();
+                throw math::backend_not_found();
             }
         }
         auto t =
@@ -118,22 +118,22 @@ class table_initializer {
 
         if (!t) {
             std::cerr << ERROR_MSG << '\n';
-            throw mkl::function_not_found();
+            throw math::function_not_found();
         }
         if (t->version != SPEC_VERSION)
-            throw mkl::specification_mismatch();
+            throw math::specification_mismatch();
 
         handles[key] = std::move(handle);
         tables[key] = *t;
         return *t;
     }
 
-    std::map<oneapi::mkl::device, function_table_t> tables;
-    std::map<oneapi::mkl::device, dlhandle> handles;
+    std::map<oneapi::math::device, function_table_t> tables;
+    std::map<oneapi::math::device, dlhandle> handles;
 };
 
 } //namespace detail
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif //_LOADER_HPP_
diff --git a/src/include/runtime_support_helper.hpp b/src/include/runtime_support_helper.hpp
index 7c3514673..cabc4ea8d 100644
--- a/src/include/runtime_support_helper.hpp
+++ b/src/include/runtime_support_helper.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_RUNTIME_SUPPORT_HELPER_HPP_
-#define _ONEMKL_RUNTIME_SUPPORT_HELPER_HPP_
+#ifndef _ONEMATH_RUNTIME_SUPPORT_HELPER_HPP_
+#define _ONEMATH_RUNTIME_SUPPORT_HELPER_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -50,4 +50,4 @@ bool verify_support(sycl::queue q, sycl::aspect aspect) {
     }
 }
 
-#endif //_ONEMKL_RUNTIME_SUPPORT_HELPER_HPP_
+#endif //_ONEMATH_RUNTIME_SUPPORT_HELPER_HPP_
diff --git a/src/lapack/CMakeLists.txt b/src/lapack/CMakeLists.txt
index 524edde03..17ee4cca1 100644
--- a/src/lapack/CMakeLists.txt
+++ b/src/lapack/CMakeLists.txt
@@ -22,22 +22,23 @@ add_subdirectory(backends)
 
 # Recipe for LAPACK loader object
 if(BUILD_SHARED_LIBS)
-add_library(onemkl_lapack OBJECT)
-target_sources(onemkl_lapack PRIVATE lapack_loader.cpp)
-target_include_directories(onemkl_lapack
+add_library(onemath_lapack OBJECT)
+add_deprecated_library(onemath_lapack)
+target_sources(onemath_lapack PRIVATE lapack_loader.cpp)
+target_include_directories(onemath_lapack
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
-          $<TARGET_FILE_DIR:onemkl>
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
+          $<TARGET_FILE_DIR:onemath>
 )
 
-target_compile_options(onemkl_lapack PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(onemath_lapack PRIVATE ${ONEMATH_BUILD_COPT})
 
-set_target_properties(onemkl_lapack PROPERTIES
+set_target_properties(onemath_lapack PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
-target_link_libraries(onemkl_lapack PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(onemath_lapack PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
diff --git a/src/lapack/backends/CMakeLists.txt b/src/lapack/backends/CMakeLists.txt
index 636f6728f..44ac43b38 100644
--- a/src/lapack/backends/CMakeLists.txt
+++ b/src/lapack/backends/CMakeLists.txt
@@ -17,8 +17,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-add_custom_target(onemkl_backend_libs_lapack)
-add_dependencies(onemkl_backend_libs onemkl_backend_libs_lapack)
+add_custom_target(onemath_backend_libs_lapack)
+add_dependencies(onemath_backend_libs onemath_backend_libs_lapack)
 
 if(ENABLE_MKLCPU_BACKEND)
   add_subdirectory(mklcpu)
diff --git a/src/lapack/backends/cusolver/CMakeLists.txt b/src/lapack/backends/cusolver/CMakeLists.txt
index dfd1267d7..b49e9e1ff 100644
--- a/src/lapack/backends/cusolver/CMakeLists.txt
+++ b/src/lapack/backends/cusolver/CMakeLists.txt
@@ -17,17 +17,18 @@
 #
 #=========================================================================
 
-set(LIB_NAME onemkl_lapack_cusolver)
+set(LIB_NAME onemath_lapack_cusolver)
 set(LIB_OBJ ${LIB_NAME}_obj)
 find_package(cuSOLVER REQUIRED)
 find_package(cuBLAS REQUIRED)
 set(SOURCES     cusolver_lapack.cpp
                 cusolver_batch.cpp
-                $<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},dpc++>:cusolver_scope_handle.cpp >
+                $<$<STREQUAL:${ONEMATH_SYCL_IMPLEMENTATION},dpc++>:cusolver_scope_handle.cpp >
                 $<$<BOOL:${BUILD_SHARED_LIBS}>: cusolver_wrappers.cpp>)
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_lapack ${LIB_NAME})
+add_dependencies(onemath_backend_libs_lapack ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
@@ -35,13 +36,13 @@ target_include_directories(${LIB_OBJ}
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_SOURCE_DIR}/src/blas/backends/cublas
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 target_link_libraries(${LIB_OBJ} 
-  PUBLIC ONEMKL::SYCL::SYCL 
-         ONEMKL::cuSOLVER::cuSOLVER
-         ONEMKL::cuBLAS::cuBLAS)
+  PUBLIC ONEMATH::SYCL::SYCL 
+         ONEMATH::cuSOLVER::cuSOLVER
+         ONEMATH::cuBLAS::cuBLAS)
 target_compile_features(${LIB_OBJ} PUBLIC cxx_std_11)
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON)
@@ -59,8 +60,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/lapack/backends/cusolver/cusolver_batch.cpp b/src/lapack/backends/cusolver/cusolver_batch.cpp
index f5e2a6e5f..3ad137912 100644
--- a/src/lapack/backends/cusolver/cusolver_batch.cpp
+++ b/src/lapack/backends/cusolver/cusolver_batch.cpp
@@ -20,11 +20,11 @@
 #include "cusolver_helper.hpp"
 #include "cusolver_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
@@ -44,7 +44,7 @@ inline void geqrf_batch(const char* func_name, Func func, sycl::queue& queue, st
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
 
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -110,7 +110,7 @@ inline void getri_batch(const char* func_name, Func func, sycl::queue& queue, st
         sycl::accessor ipiv32_acc{ ipiv32, cgh };
         sycl::accessor devInfo_acc{ devInfo, cgh, sycl::write_only };
 
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             cublasStatus_t err;
             CUresult cuda_result;
             cublasHandle_t cublas_handle;
@@ -186,7 +186,7 @@ GETRI_STRIDED_BATCH_LAUNCHER(std::complex<double>, cublasZgetriBatched)
 
 template <typename Func, typename T>
 inline void getrs_batch(const char* func_name, Func func, sycl::queue& queue,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                        oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                         sycl::buffer<T>& a, std::int64_t lda, std::int64_t stride_a,
                         sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                         sycl::buffer<T>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -215,7 +215,7 @@ inline void getrs_batch(const char* func_name, Func func, sycl::queue& queue,
         auto ipiv_acc = ipiv32.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::write>(cgh);
 
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto ipiv_ = sc.get_mem<std::int32_t*>(ipiv_acc);
@@ -236,7 +236,7 @@ inline void getrs_batch(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define GETRS_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                      \
-    void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,            \
+    void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,           \
                      std::int64_t nrhs, sycl::buffer<TYPE>& a, std::int64_t lda,                  \
                      std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,                     \
                      std::int64_t stride_ipiv, sycl::buffer<TYPE>& b, std::int64_t ldb,           \
@@ -276,7 +276,7 @@ inline void getrf_batch(const char* func_name, Func func, sycl::queue& queue, st
         auto ipiv32_acc = ipiv32.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto ipiv_ = sc.get_mem<int*>(ipiv32_acc);
@@ -334,7 +334,7 @@ inline void orgqr_batch(const char* func_name, Func func, sycl::queue& queue, st
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
 
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -367,7 +367,7 @@ ORGQR_STRIDED_BATCH_LAUNCHER(double, cusolverDnDorgqr)
 
 template <typename Func, typename T>
 inline void potrf_batch(const char* func_name, Func func, sycl::queue& queue,
-                        oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<T>& a,
+                        oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<T>& a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                         sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -376,7 +376,7 @@ inline void potrf_batch(const char* func_name, Func func, sycl::queue& queue,
 
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             CUdeviceptr a_dev;
             CUresult cuda_result;
@@ -402,7 +402,7 @@ inline void potrf_batch(const char* func_name, Func func, sycl::queue& queue,
 
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
 #define POTRF_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                      \
-    void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,                  \
+    void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,                 \
                      sycl::buffer<TYPE>& a, std::int64_t lda, std::int64_t stride_a,              \
                      std::int64_t batch_size, sycl::buffer<TYPE>& scratchpad,                     \
                      std::int64_t scratchpad_size) {                                              \
@@ -419,7 +419,7 @@ POTRF_STRIDED_BATCH_LAUNCHER(std::complex<double>, cusolverDnZpotrfBatched)
 
 template <typename Func, typename T>
 inline void potrs_batch(const char* func_name, Func func, sycl::queue& queue,
-                        oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+                        oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                         sycl::buffer<T>& a, std::int64_t lda, std::int64_t stride_a,
                         sycl::buffer<T>& b, std::int64_t ldb, std::int64_t stride_b,
                         std::int64_t batch_size, sycl::buffer<T>& scratchpad,
@@ -435,7 +435,7 @@ inline void potrs_batch(const char* func_name, Func func, sycl::queue& queue,
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             CUdeviceptr a_dev, b_dev;
             cusolverStatus_t err;
@@ -469,7 +469,7 @@ inline void potrs_batch(const char* func_name, Func func, sycl::queue& queue,
 
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
 #define POTRS_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                     \
-    void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,                 \
+    void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,                \
                      std::int64_t nrhs, sycl::buffer<TYPE>& a, std::int64_t lda,                 \
                      std::int64_t stride_a, sycl::buffer<TYPE>& b, std::int64_t ldb,             \
                      std::int64_t stride_b, std::int64_t batch_size,                             \
@@ -500,7 +500,7 @@ inline void ungqr_batch(const char* func_name, Func func, sycl::queue& queue, st
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
 
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -545,7 +545,7 @@ inline sycl::event geqrf_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -596,7 +596,7 @@ inline sycl::event geqrf_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType**>(a);
             auto tau_ = reinterpret_cast<cuDataType**>(tau);
@@ -654,7 +654,7 @@ inline sycl::event getrf_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto devInfo_ = reinterpret_cast<int*>(devInfo);
@@ -736,7 +736,7 @@ inline sycl::event getrf_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType**>(a);
             auto scratch_ = reinterpret_cast<cuDataType*>(scratchpad);
@@ -836,7 +836,7 @@ sycl::event getri_batch(const char* func_name, Func func, sycl::queue& queue, st
         cgh.depends_on(done_casting);
         cgh.depends_on(dependencies);
 
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             cublasStatus_t err;
             CUresult cuda_result;
             cublasHandle_t cublas_handle;
@@ -938,7 +938,7 @@ sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<double
 
 template <typename Func, typename T>
 inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                               oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                                T* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                                std::int64_t stride_ipiv, T* b, std::int64_t ldb,
                                std::int64_t stride_b, std::int64_t batch_size, T* scratchpad,
@@ -963,7 +963,7 @@ inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& qu
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
         cgh.depends_on(done_casting);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto ipiv_ = reinterpret_cast<int*>(ipiv32);
@@ -988,7 +988,7 @@ inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& qu
 }
 
 #define GETRS_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                  \
-    sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,     \
+    sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,    \
                             std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t stride_a,  \
                             std::int64_t* ipiv, std::int64_t stride_ipiv, TYPE* b,                \
                             std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,     \
@@ -1008,7 +1008,7 @@ GETRS_STRIDED_BATCH_LAUNCHER_USM(std::complex<double>, cusolverDnZgetrs)
 
 template <typename Func, typename T>
 inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+                               oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
                                T** a, std::int64_t* lda, std::int64_t** ipiv, T** b,
                                std::int64_t* ldb, std::int64_t group_count,
                                std::int64_t* group_sizes, T* scratchpad,
@@ -1051,7 +1051,7 @@ inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& qu
         cgh.depends_on(dependencies);
         cgh.depends_on(casting_dependencies);
 
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType**>(a);
             auto b_ = reinterpret_cast<cuDataType**>(b);
@@ -1081,15 +1081,15 @@ inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& qu
     return done;
 }
 
-#define GETRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                        \
-    sycl::event getrs_batch(                                                                    \
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, \
-        TYPE** a, std::int64_t* lda, std::int64_t** ipiv, TYPE** b, std::int64_t* ldb,          \
-        std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad,                  \
-        std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {           \
-        return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda,  \
-                           ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, \
-                           dependencies);                                                       \
+#define GETRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                         \
+    sycl::event getrs_batch(                                                                     \
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, \
+        TYPE** a, std::int64_t* lda, std::int64_t** ipiv, TYPE** b, std::int64_t* ldb,           \
+        std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad,                   \
+        std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {            \
+        return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda,   \
+                           ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size,  \
+                           dependencies);                                                        \
     }
 
 GETRS_BATCH_LAUNCHER_USM(float, cusolverDnSgetrs)
@@ -1111,7 +1111,7 @@ inline sycl::event orgqr_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1161,7 +1161,7 @@ inline sycl::event orgqr_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType**>(a);
             auto tau_ = reinterpret_cast<cuDataType**>(tau);
@@ -1202,7 +1202,7 @@ ORGQR_BATCH_LAUNCHER_USM(double, cusolverDnDorgqr)
 
 template <typename Func, typename T>
 inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
+                               oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
                                std::int64_t stride_a, std::int64_t batch_size, T* scratchpad,
                                std::int64_t scratchpad_size,
                                const std::vector<sycl::event>& dependencies) {
@@ -1212,7 +1212,7 @@ inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             CUdeviceptr a_dev;
             cusolverStatus_t err;
@@ -1239,7 +1239,7 @@ inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& qu
 
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
 #define POTRF_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                  \
-    sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,  \
+    sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \
                             std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,     \
                             TYPE* scratchpad, std::int64_t scratchpad_size,                       \
                             const std::vector<sycl::event>& dependencies) {                       \
@@ -1256,7 +1256,7 @@ POTRF_STRIDED_BATCH_LAUNCHER_USM(std::complex<double>, cusolverDnZpotrfBatched)
 
 template <typename Func, typename T>
 inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda,
+                               oneapi::math::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda,
                                std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad,
                                std::int64_t scratchpad_size,
                                const std::vector<sycl::event>& dependencies) {
@@ -1270,7 +1270,7 @@ inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             CUdeviceptr a_dev;
@@ -1300,13 +1300,13 @@ inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& qu
 }
 
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
-#define POTRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                           \
-    sycl::event potrf_batch(                                                                       \
-        sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, TYPE** a, std::int64_t* lda, \
-        std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad,                     \
-        std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {              \
-        return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda,            \
-                           group_count, group_sizes, scratchpad, scratchpad_size, dependencies);   \
+#define POTRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                          \
+    sycl::event potrf_batch(                                                                      \
+        sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, TYPE** a,                  \
+        std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \
+        std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {             \
+        return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda,           \
+                           group_count, group_sizes, scratchpad, scratchpad_size, dependencies);  \
     }
 
 POTRF_BATCH_LAUNCHER_USM(float, cusolverDnSpotrfBatched)
@@ -1318,7 +1318,7 @@ POTRF_BATCH_LAUNCHER_USM(std::complex<double>, cusolverDnZpotrfBatched)
 
 template <typename Func, typename T>
 inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a,
+                               oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a,
                                std::int64_t lda, std::int64_t stride_a, T* b, std::int64_t ldb,
                                std::int64_t stride_b, std::int64_t batch_size, T* scratchpad,
                                std::int64_t scratchpad_size,
@@ -1333,7 +1333,7 @@ inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             CUresult cuda_result;
             CUdeviceptr a_dev, b_dev;
@@ -1367,7 +1367,7 @@ inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& qu
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
 #define POTRS_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                   \
     sycl::event potrs_batch(                                                                       \
-        sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, TYPE* a,    \
+        sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, TYPE* a,   \
         std::int64_t lda, std::int64_t stride_a, TYPE* b, std::int64_t ldb, std::int64_t stride_b, \
         std::int64_t batch_size, TYPE* scratchpad, std::int64_t scratchpad_size,                   \
         const std::vector<sycl::event>& dependencies) {                                            \
@@ -1385,7 +1385,7 @@ POTRS_STRIDED_BATCH_LAUNCHER_USM(std::complex<double>, cusolverDnZpotrsBatched)
 
 template <typename Func, typename T>
 inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a,
+                               oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a,
                                std::int64_t* lda, T** b, std::int64_t* ldb,
                                std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad,
                                std::int64_t scratchpad_size,
@@ -1416,7 +1416,7 @@ inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& qu
         cgh.depends_on(dependencies);
         cgh.depends_on(done_cpy_a);
         cgh.depends_on(done_cpy_b);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             cusolverStatus_t err;
@@ -1442,7 +1442,7 @@ inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& qu
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
 #define POTRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                         \
     sycl::event potrs_batch(                                                                     \
-        sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,        \
+        sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,       \
         TYPE** a, std::int64_t* lda, TYPE** b, std::int64_t* ldb, std::int64_t group_count,      \
         std::int64_t* group_sizes, TYPE* scratchpad, std::int64_t scratchpad_size,               \
         const std::vector<sycl::event>& dependencies) {                                          \
@@ -1470,7 +1470,7 @@ inline sycl::event ungqr_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1520,7 +1520,7 @@ inline sycl::event ungqr_batch(const char* func_name, Func func, sycl::queue& qu
 
     auto done = queue.submit([&](sycl::handler& cgh) {
         cgh.depends_on(dependencies);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType**>(a);
             auto tau_ = reinterpret_cast<cuDataType**>(tau);
@@ -1567,7 +1567,7 @@ inline void getrf_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t stride_a, std::int64_t stride_ipiv,
                                         std::int64_t batch_size, int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             cusolverStatus_t err;
 
@@ -1613,13 +1613,13 @@ GETRI_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex<double>)
 #undef GETRI_STRIDED_BATCH_LAUNCHER_SCRATCH
 
 // cusolverDnXgetrs does not use scratchpad memory
-#define GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE)                                            \
-    template <>                                                                               \
-    std::int64_t getrs_batch_scratchpad_size<TYPE>(                                           \
-        sycl::queue & queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, \
-        std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,  \
-        std::int64_t stride_b, std::int64_t batch_size) {                                     \
-        return 0;                                                                             \
+#define GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE)                                             \
+    template <>                                                                                \
+    std::int64_t getrs_batch_scratchpad_size<TYPE>(                                            \
+        sycl::queue & queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, \
+        std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,   \
+        std::int64_t stride_b, std::int64_t batch_size) {                                      \
+        return 0;                                                                              \
     }
 
 GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(float)
@@ -1635,7 +1635,7 @@ inline void geqrf_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t stride_a, std::int64_t stride_tau,
                                         std::int64_t batch_size, int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             cusolverStatus_t err;
 
@@ -1664,12 +1664,12 @@ GEQRF_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZgeqrf_buff
 #undef GEQRF_STRIDED_BATCH_LAUNCHER_SCRATCH
 
 // cusolverDnXpotrfBatched does not use scratchpad memory
-#define POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE)                                     \
-    template <>                                                                        \
-    std::int64_t potrf_batch_scratchpad_size<TYPE>(                                    \
-        sycl::queue & queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, \
-        std::int64_t stride_a, std::int64_t batch_size) {                              \
-        return 0;                                                                      \
+#define POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE)                                      \
+    template <>                                                                         \
+    std::int64_t potrf_batch_scratchpad_size<TYPE>(                                     \
+        sycl::queue & queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, \
+        std::int64_t stride_a, std::int64_t batch_size) {                               \
+        return 0;                                                                       \
     }
 
 POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(float)
@@ -1683,7 +1683,7 @@ POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex<double>)
 #define POTRS_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE)                                        \
     template <>                                                                           \
     std::int64_t potrs_batch_scratchpad_size<TYPE>(                                       \
-        sycl::queue & queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,   \
+        sycl::queue & queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,  \
         std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, \
         std::int64_t batch_size) {                                                        \
         return 0;                                                                         \
@@ -1703,7 +1703,7 @@ inline void orgqr_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t stride_tau, std::int64_t batch_size,
                                         int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             cusolverStatus_t err;
 
@@ -1737,7 +1737,7 @@ inline void ungqr_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t stride_tau, std::int64_t batch_size,
                                         int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             cusolverStatus_t err;
 
@@ -1770,7 +1770,7 @@ inline void getrf_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t group_count, std::int64_t* group_sizes,
                                         int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int group_scratch_size = 0;
             *scratch_size = 0;
@@ -1827,13 +1827,13 @@ GETRI_GROUP_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef GETRI_GROUP_LAUNCHER_SCRATCH
 
-#define GETRS_GROUP_LAUNCHER_SCRATCH(TYPE)                                                        \
-    template <>                                                                                   \
-    std::int64_t getrs_batch_scratchpad_size<TYPE>(                                               \
-        sycl::queue & queue, oneapi::mkl::transpose * trans, std::int64_t* n, std::int64_t* nrhs, \
-        std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,                           \
-        std::int64_t* group_sizes) {                                                              \
-        return 0;                                                                                 \
+#define GETRS_GROUP_LAUNCHER_SCRATCH(TYPE)                                                         \
+    template <>                                                                                    \
+    std::int64_t getrs_batch_scratchpad_size<TYPE>(                                                \
+        sycl::queue & queue, oneapi::math::transpose * trans, std::int64_t* n, std::int64_t* nrhs, \
+        std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,                            \
+        std::int64_t* group_sizes) {                                                               \
+        return 0;                                                                                  \
     }
 
 GETRS_GROUP_LAUNCHER_SCRATCH(float)
@@ -1849,7 +1849,7 @@ inline void geqrf_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t group_count, std::int64_t* group_sizes,
                                         int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int group_scratch_size = 0;
             *scratch_size = 0;
@@ -1891,7 +1891,7 @@ inline void orgqr_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t* lda, std::int64_t group_count,
                                         std::int64_t* group_sizes, int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int group_scratch_size = 0;
             *scratch_size = 0;
@@ -1927,12 +1927,12 @@ ORGQR_GROUP_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize)
 #undef ORGQR_GROUP_LAUNCHER_SCRATCH
 
 // cusolverDnXpotrfBatched does not use scratchpad memory
-#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE)                                                 \
-    template <>                                                                            \
-    std::int64_t potrf_batch_scratchpad_size<TYPE>(                                        \
-        sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* lda, \
-        std::int64_t group_count, std::int64_t* group_sizes) {                             \
-        return 0;                                                                          \
+#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE)                                                  \
+    template <>                                                                             \
+    std::int64_t potrf_batch_scratchpad_size<TYPE>(                                         \
+        sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* lda, \
+        std::int64_t group_count, std::int64_t* group_sizes) {                              \
+        return 0;                                                                           \
     }
 
 POTRF_GROUP_LAUNCHER_SCRATCH(float)
@@ -1943,13 +1943,13 @@ POTRF_GROUP_LAUNCHER_SCRATCH(std::complex<double>)
 #undef POTRF_GROUP_LAUNCHER_SCRATCH
 
 // cusolverDnXpotrsBatched does not use scratchpad memory
-#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE)                                                  \
-    template <>                                                                             \
-    std::int64_t potrs_batch_scratchpad_size<TYPE>(                                         \
-        sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \
-        std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,                     \
-        std::int64_t* group_sizes) {                                                        \
-        return 0;                                                                           \
+#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE)                                                   \
+    template <>                                                                              \
+    std::int64_t potrs_batch_scratchpad_size<TYPE>(                                          \
+        sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \
+        std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,                      \
+        std::int64_t* group_sizes) {                                                         \
+        return 0;                                                                            \
     }
 
 POTRS_GROUP_LAUNCHER_SCRATCH(float)
@@ -1965,7 +1965,7 @@ inline void ungqr_batch_scratchpad_size(const char* func_name, Func func, sycl::
                                         std::int64_t* lda, std::int64_t group_count,
                                         std::int64_t* group_sizes, int* scratch_size) {
     auto e = queue.submit([&](sycl::handler& cgh) {
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int group_scratch_size = 0;
             *scratch_size = 0;
@@ -2002,5 +2002,5 @@ UNGQR_GROUP_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZungqr_bufferSize)
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/cusolver/cusolver_handle.hpp b/src/lapack/backends/cusolver/cusolver_handle.hpp
index 75d589b06..53be426f5 100644
--- a/src/lapack/backends/cusolver/cusolver_handle.hpp
+++ b/src/lapack/backends/cusolver/cusolver_handle.hpp
@@ -22,7 +22,7 @@
 #include <unordered_map>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
@@ -55,7 +55,7 @@ struct cusolver_handle {
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // CUSOLVER_HANDLE_HPP
diff --git a/src/lapack/backends/cusolver/cusolver_helper.hpp b/src/lapack/backends/cusolver/cusolver_helper.hpp
index 425993d45..52da00bb6 100644
--- a/src/lapack/backends/cusolver/cusolver_helper.hpp
+++ b/src/lapack/backends/cusolver/cusolver_helper.hpp
@@ -33,18 +33,18 @@
 #include <cuda.h>
 #include <complex>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include "runtime_support_helper.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/lapack/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/lapack/exceptions.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
 // The static assert to make sure that all index types used in
-// oneMKL/include/oneapi/mkl/lapack.hpp interface are int64_t
+// oneMath/include/oneapi/math/lapack.hpp interface are int64_t
 template <typename... Next>
 struct is_int64 : std::false_type {};
 
@@ -76,7 +76,7 @@ struct Overflow<Index, T...> {
 
 template <typename Index, typename... Next>
 void overflow_check(Index index, Next... indices) {
-    static_assert(is_int64<Index, Next...>::value, "oneMKL index type must be 64 bit integer.");
+    static_assert(is_int64<Index, Next...>::value, "oneMath index type must be 64 bit integer.");
     Overflow<Index, Next...>::check(index, indices...);
 }
 
@@ -219,52 +219,52 @@ inline cusolverEigType_t get_cusolver_itype(std::int64_t itype) {
     }
 }
 
-inline cusolverEigMode_t get_cusolver_job(oneapi::mkl::job jobz) {
+inline cusolverEigMode_t get_cusolver_job(oneapi::math::job jobz) {
     switch (jobz) {
-        case oneapi::mkl::job::N: return CUSOLVER_EIG_MODE_NOVECTOR;
-        case oneapi::mkl::job::V: return CUSOLVER_EIG_MODE_VECTOR;
+        case oneapi::math::job::N: return CUSOLVER_EIG_MODE_NOVECTOR;
+        case oneapi::math::job::V: return CUSOLVER_EIG_MODE_VECTOR;
         default: throw "Wrong jobz.";
     }
 }
 
-inline signed char get_cusolver_jobsvd(oneapi::mkl::jobsvd job) {
+inline signed char get_cusolver_jobsvd(oneapi::math::jobsvd job) {
     switch (job) {
-        case oneapi::mkl::jobsvd::N: return 'N';
-        case oneapi::mkl::jobsvd::A: return 'A';
-        case oneapi::mkl::jobsvd::O: return 'O';
-        case oneapi::mkl::jobsvd::S: return 'S';
+        case oneapi::math::jobsvd::N: return 'N';
+        case oneapi::math::jobsvd::A: return 'A';
+        case oneapi::math::jobsvd::O: return 'O';
+        case oneapi::math::jobsvd::S: return 'S';
     }
 }
 
-inline cublasOperation_t get_cublas_operation(oneapi::mkl::transpose trn) {
+inline cublasOperation_t get_cublas_operation(oneapi::math::transpose trn) {
     switch (trn) {
-        case oneapi::mkl::transpose::nontrans: return CUBLAS_OP_N;
-        case oneapi::mkl::transpose::trans: return CUBLAS_OP_T;
-        case oneapi::mkl::transpose::conjtrans: return CUBLAS_OP_C;
+        case oneapi::math::transpose::nontrans: return CUBLAS_OP_N;
+        case oneapi::math::transpose::trans: return CUBLAS_OP_T;
+        case oneapi::math::transpose::conjtrans: return CUBLAS_OP_C;
         default: throw "Wrong transpose Operation.";
     }
 }
 
-inline cublasFillMode_t get_cublas_fill_mode(oneapi::mkl::uplo ul) {
+inline cublasFillMode_t get_cublas_fill_mode(oneapi::math::uplo ul) {
     switch (ul) {
-        case oneapi::mkl::uplo::upper: return CUBLAS_FILL_MODE_UPPER;
-        case oneapi::mkl::uplo::lower: return CUBLAS_FILL_MODE_LOWER;
+        case oneapi::math::uplo::upper: return CUBLAS_FILL_MODE_UPPER;
+        case oneapi::math::uplo::lower: return CUBLAS_FILL_MODE_LOWER;
         default: throw "Wrong fill mode.";
     }
 }
 
-inline cublasSideMode_t get_cublas_side_mode(oneapi::mkl::side lr) {
+inline cublasSideMode_t get_cublas_side_mode(oneapi::math::side lr) {
     switch (lr) {
-        case oneapi::mkl::side::left: return CUBLAS_SIDE_LEFT;
-        case oneapi::mkl::side::right: return CUBLAS_SIDE_RIGHT;
+        case oneapi::math::side::left: return CUBLAS_SIDE_LEFT;
+        case oneapi::math::side::right: return CUBLAS_SIDE_RIGHT;
         default: throw "Wrong side mode.";
     }
 }
 
-inline cublasSideMode_t get_cublas_generate(oneapi::mkl::generate qp) {
+inline cublasSideMode_t get_cublas_generate(oneapi::math::generate qp) {
     switch (qp) {
-        case oneapi::mkl::generate::Q: return CUBLAS_SIDE_LEFT;
-        case oneapi::mkl::generate::P: return CUBLAS_SIDE_RIGHT;
+        case oneapi::math::generate::Q: return CUBLAS_SIDE_LEFT;
+        case oneapi::math::generate::P: return CUBLAS_SIDE_RIGHT;
         default: throw "Wrong generate.";
     }
 }
@@ -310,7 +310,7 @@ inline void lapack_info_check(sycl::queue& queue, DEVINFO_T devinfo, const char*
     get_cusolver_devinfo(queue, devinfo, dev_info_);
     for (const auto& val : dev_info_) {
         if (val > 0)
-            throw oneapi::mkl::lapack::computation_error(
+            throw oneapi::math::lapack::computation_error(
                 func_name, std::string(cufunc_name) + " failed with info = " + std::to_string(val),
                 val);
     }
@@ -331,6 +331,6 @@ T** create_ptr_list_from_stride(T* ptr, int64_t ptr_stride, int64_t batch_size)
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif // _CUSOLVER_HELPER_HPP_
diff --git a/src/lapack/backends/cusolver/cusolver_lapack.cpp b/src/lapack/backends/cusolver/cusolver_lapack.cpp
index 2e3176156..6a5427712 100644
--- a/src/lapack/backends/cusolver/cusolver_lapack.cpp
+++ b/src/lapack/backends/cusolver/cusolver_lapack.cpp
@@ -19,11 +19,11 @@
 #include "cusolver_helper.hpp"
 #include "cusolver_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
@@ -48,7 +48,7 @@ inline void gebrd(const char* func_name, Func func, sycl::queue& queue, std::int
         auto tauq_acc = tauq.template get_access<sycl::access::mode::write>(cgh);
         auto taup_acc = taup.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType_A*>(a_acc);
             auto d_ = sc.get_mem<cuDataType_B*>(d_acc);
@@ -111,7 +111,7 @@ inline void geqrf(const char* func_name, Func func, sycl::queue& queue, std::int
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -157,7 +157,7 @@ void getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m,
         auto ipiv32_acc = ipiv32.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto ipiv32_ = sc.get_mem<int*>(ipiv32_acc);
@@ -212,7 +212,7 @@ GETRI_LAUNCHER(std::complex<double>)
 // cusolverDnXgetrs does not use scratchpad memory
 template <typename Func, typename T>
 inline void getrs(const char* func_name, Func func, sycl::queue& queue,
-                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                  oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                   sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                   sycl::buffer<T>& b, std::int64_t ldb, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -237,7 +237,7 @@ inline void getrs(const char* func_name, Func func, sycl::queue& queue,
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto ipiv_acc = ipiv32.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto ipiv_ = sc.get_mem<std::int32_t*>(ipiv_acc);
@@ -250,7 +250,7 @@ inline void getrs(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define GETRS_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                  \
-    void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,                \
+    void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,               \
                std::int64_t nrhs, sycl::buffer<TYPE>& a, std::int64_t lda,                      \
                sycl::buffer<std::int64_t>& ipiv, sycl::buffer<TYPE>& b, std::int64_t ldb,       \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                  \
@@ -266,8 +266,8 @@ GETRS_LAUNCHER(std::complex<double>, cusolverDnZgetrs)
 #undef GETRS_LAUNCHER
 
 template <typename Func, typename T_A, typename T_B>
-inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                  oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<T_A>& a,
+inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::jobsvd jobu,
+                  oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<T_A>& a,
                   std::int64_t lda, sycl::buffer<T_B>& s, sycl::buffer<T_A>& u, std::int64_t ldu,
                   sycl::buffer<T_A>& vt, std::int64_t ldvt, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -282,7 +282,7 @@ inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto vt_acc = vt.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType_A*>(a_acc);
             auto s_ = sc.get_mem<cuDataType_B*>(s_acc);
@@ -301,7 +301,7 @@ inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define GESVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                        \
-    void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,         \
+    void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,       \
                std::int64_t m, std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda,       \
                sycl::buffer<TYPE_B>& s, sycl::buffer<TYPE_A>& u, std::int64_t ldu,              \
                sycl::buffer<TYPE_A>& vt, std::int64_t ldvt, sycl::buffer<TYPE_A>& scratchpad,   \
@@ -318,8 +318,8 @@ GESVD_LAUNCHER(std::complex<double>, double, cusolverDnZgesvd)
 #undef GESVD_LAUNCHER
 
 template <typename Func, typename T_A, typename T_B>
-inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<T_A>& a, std::int64_t lda,
+inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<T_A>& a, std::int64_t lda,
                   sycl::buffer<T_B>& w, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
     using cuDataType_A = typename CudaEquivalentType<T_A>::Type;
@@ -331,7 +331,7 @@ inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType_A*>(a_acc);
             auto w_ = sc.get_mem<cuDataType_B*>(w_acc);
@@ -346,12 +346,12 @@ inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                          \
-    void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \
-               sycl::buffer<TYPE_A>& a, std::int64_t lda, sycl::buffer<TYPE_B>& w,                \
-               sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                  \
-        heevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad,   \
-              scratchpad_size);                                                                   \
+#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                           \
+    void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,                \
+               std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda, sycl::buffer<TYPE_B>& w, \
+               sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                   \
+        heevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad,    \
+              scratchpad_size);                                                                    \
     }
 
 HEEVD_LAUNCHER(std::complex<float>, float, cusolverDnCheevd)
@@ -361,7 +361,7 @@ HEEVD_LAUNCHER(std::complex<double>, double, cusolverDnZheevd)
 
 template <typename Func, typename T_A, typename T_B>
 inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                   sycl::buffer<T_A>& a, std::int64_t lda, sycl::buffer<T_A>& b, std::int64_t ldb,
                   sycl::buffer<T_B>& w, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -375,7 +375,7 @@ inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType_A*>(a_acc);
             auto b_ = sc.get_mem<cuDataType_A*>(b_acc);
@@ -392,8 +392,8 @@ inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int
 }
 
 #define HEGVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                           \
-    void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,                      \
-               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda,  \
+    void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,                     \
+               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda, \
                sycl::buffer<TYPE_A>& b, std::int64_t ldb, sycl::buffer<TYPE_B>& w,                 \
                sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                   \
         hegvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \
@@ -406,7 +406,7 @@ HEGVD_LAUNCHER(std::complex<double>, double, cusolverDnZhegvd)
 #undef HEGVD_LAUNCHER
 
 template <typename Func, typename T_A, typename T_B>
-inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T_A>& a, std::int64_t lda, sycl::buffer<T_B>& d,
                   sycl::buffer<T_B>& e, sycl::buffer<T_A>& tau, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -421,7 +421,7 @@ inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType_A*>(a_acc);
             auto d_ = sc.get_mem<cuDataType_B*>(d_acc);
@@ -438,7 +438,7 @@ inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define HETRD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                          \
-    void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,                        \
+    void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,                       \
                sycl::buffer<TYPE_A>& a, std::int64_t lda, sycl::buffer<TYPE_B>& d,                \
                sycl::buffer<TYPE_B>& e, sycl::buffer<TYPE_A>& tau,                                \
                sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                  \
@@ -451,12 +451,12 @@ HETRD_LAUNCHER(std::complex<double>, double, cusolverDnZhetrd)
 
 #undef HETRD_LAUNCHER
 
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "hetrf");
 }
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
@@ -464,7 +464,7 @@ void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
 }
 
 template <typename Func, typename T>
-inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec,
+inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T>& a,
                   std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -474,7 +474,7 @@ inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -487,7 +487,7 @@ inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define ORGBR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                   \
-    void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,    \
+    void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,   \
                std::int64_t k, sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                   \
         orgbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \
@@ -509,7 +509,7 @@ inline void orgqr(const char* func_name, Func func, sycl::queue& queue, std::int
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -535,7 +535,7 @@ ORGQR_LAUNCHER(double, cusolverDnDorgqr)
 #undef ORGQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -544,7 +544,7 @@ inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -556,12 +556,12 @@ inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     });
 }
 
-#define ORGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                    \
-    void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,         \
-               std::int64_t scratchpad_size) {                                                    \
-        orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,       \
-              scratchpad_size);                                                                   \
+#define ORGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
+    void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,          \
+               std::int64_t scratchpad_size) {                                                     \
+        orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,        \
+              scratchpad_size);                                                                    \
     }
 
 ORGTR_LAUNCHER(float, cusolverDnSorgtr)
@@ -570,8 +570,8 @@ ORGTR_LAUNCHER(double, cusolverDnDorgtr)
 #undef ORGTR_LAUNCHER
 
 template <typename Func, typename T>
-inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& c, std::int64_t ldc, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -582,7 +582,7 @@ inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto tau_acc = tau.template get_access<sycl::access::mode::read_write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -598,8 +598,8 @@ inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define ORMTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
-    void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,                 \
-               oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,                       \
+    void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,               \
+               oneapi::math::transpose trans, std::int64_t m, std::int64_t n,                      \
                sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau,                   \
                sycl::buffer<TYPE>& c, std::int64_t ldc, sycl::buffer<TYPE>& scratchpad,            \
                std::int64_t scratchpad_size) {                                                     \
@@ -612,22 +612,22 @@ ORMTR_LAUNCHER(double, cusolverDnDormtr)
 
 #undef ORMTR_LAUNCHER
 
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "ormrq");
 }
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-           sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
+           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "ormrq");
 }
 
 template <typename Func, typename T>
-inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& c,
                   std::int64_t ldc, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -637,7 +637,7 @@ inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -652,7 +652,7 @@ inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define ORMQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
-    void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,           \
+    void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,         \
                std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<TYPE>& a,              \
                std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& c, std::int64_t ldc, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                     \
@@ -666,7 +666,7 @@ ORMQR_LAUNCHER(double, cusolverDnDormqr)
 #undef ORMQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -676,7 +676,7 @@ inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto devInfo_ = sc.get_mem<int*>(devInfo_acc);
@@ -689,11 +689,11 @@ inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define POTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                    \
-    void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {  \
-        potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,            \
-              scratchpad_size);                                                                   \
+#define POTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
+    void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {   \
+        potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,             \
+              scratchpad_size);                                                                    \
     }
 
 POTRF_LAUNCHER(float, cusolverDnSpotrf)
@@ -704,7 +704,7 @@ POTRF_LAUNCHER(std::complex<double>, cusolverDnZpotrf)
 #undef POTRF_LAUNCHER
 
 template <typename Func, typename T>
-inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -714,7 +714,7 @@ inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto devInfo_ = sc.get_mem<int*>(devInfo_acc);
@@ -727,11 +727,11 @@ inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define POTRI_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                    \
-    void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {  \
-        potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,            \
-              scratchpad_size);                                                                   \
+#define POTRI_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
+    void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {   \
+        potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,             \
+              scratchpad_size);                                                                    \
     }
 
 POTRI_LAUNCHER(float, cusolverDnSpotri)
@@ -743,7 +743,7 @@ POTRI_LAUNCHER(std::complex<double>, cusolverDnZpotri)
 
 // cusolverDnXpotrs does not use scratchpad memory
 template <typename Func, typename T>
-inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::int64_t nrhs, sycl::buffer<T>& a, std::int64_t lda,
                   sycl::buffer<T>& b, std::int64_t ldb, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -752,7 +752,7 @@ inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -764,7 +764,7 @@ inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define POTRS_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                   \
-    void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,    \
+    void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,   \
                sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& b, std::int64_t ldb, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                   \
         potrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb,         \
@@ -779,8 +779,8 @@ POTRS_LAUNCHER(std::complex<double>, cusolverDnZpotrs)
 #undef POTRS_LAUNCHER
 
 template <typename Func, typename T>
-inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<T>& a, std::int64_t lda,
+inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<T>& a, std::int64_t lda,
                   sycl::buffer<T>& w, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
     overflow_check(n, lda, scratchpad_size);
@@ -790,7 +790,7 @@ inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto w_ = sc.get_mem<cuDataType*>(w_acc);
@@ -805,12 +805,12 @@ inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define SYEVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                    \
-    void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \
-               sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& w,                    \
-               sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                    \
-        syevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad,   \
-              scratchpad_size);                                                                   \
+#define SYEVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                  \
+    void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,             \
+               std::int64_t n, sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& w,  \
+               sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                  \
+        syevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \
+              scratchpad_size);                                                                 \
     }
 
 SYEVD_LAUNCHER(float, cusolverDnSsyevd)
@@ -820,9 +820,9 @@ SYEVD_LAUNCHER(double, cusolverDnDsyevd)
 
 template <typename Func, typename T>
 inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<T>& a,
-                  std::int64_t lda, sycl::buffer<T>& b, std::int64_t ldb, sycl::buffer<T>& w,
-                  sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                  sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& b, std::int64_t ldb,
+                  sycl::buffer<T>& w, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
     overflow_check(n, lda, ldb, scratchpad_size);
     sycl::buffer<int> devInfo{ 1 };
@@ -832,7 +832,7 @@ inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto b_ = sc.get_mem<cuDataType*>(b_acc);
@@ -849,8 +849,8 @@ inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int
 }
 
 #define SYGVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
-    void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,                      \
-               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, std::int64_t lda,    \
+    void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,                     \
+               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, std::int64_t lda,   \
                sycl::buffer<TYPE>& b, std::int64_t ldb, sycl::buffer<TYPE>& w,                     \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                     \
         sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \
@@ -863,7 +863,7 @@ SYGVD_LAUNCHER(double, cusolverDnDsygvd)
 #undef SYGVD_LAUNCH
 
 template <typename Func, typename T>
-inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& d,
                   sycl::buffer<T>& e, sycl::buffer<T>& tau, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -877,7 +877,7 @@ inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto d_ = sc.get_mem<cuDataType*>(d_acc);
@@ -893,13 +893,13 @@ inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define SYTRD_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                    \
-    void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& d, sycl::buffer<TYPE>& e,                    \
-               sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,                           \
-               std::int64_t scratchpad_size) {                                                    \
-        sytrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \
-              scratchpad_size);                                                                   \
+#define SYTRD_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
+    void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& d, sycl::buffer<TYPE>& e,                     \
+               sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,                            \
+               std::int64_t scratchpad_size) {                                                     \
+        sytrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad,  \
+              scratchpad_size);                                                                    \
     }
 
 SYTRD_LAUNCHER(float, cusolverDnSsytrd)
@@ -908,7 +908,7 @@ SYTRD_LAUNCHER(double, cusolverDnDsytrd)
 #undef SYTRD_LAUNCHER
 
 template <typename Func, typename T>
-inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda,
                   sycl::buffer<std::int64_t>& ipiv, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -927,7 +927,7 @@ inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto ipiv32_acc = ipiv32.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto ipiv32_ = sc.get_mem<int*>(ipiv32_acc);
@@ -951,7 +951,7 @@ inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define SYTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
-    void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a,  \
+    void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
                std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<TYPE>& scratchpad, \
                std::int64_t scratchpad_size) {                                                     \
         sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad,       \
@@ -965,27 +965,27 @@ SYTRF_LAUNCHER(std::complex<double>, cusolverDnZsytrf)
 
 #undef SYTRF_LAUNCHER
 
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "trtrs");
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "trtrs");
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "trtrs");
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
@@ -993,7 +993,7 @@ void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose tr
 }
 
 template <typename Func, typename T>
-inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec,
+inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T>& a,
                   std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -1003,7 +1003,7 @@ inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -1016,7 +1016,7 @@ inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define UNGBR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                   \
-    void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,    \
+    void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,   \
                std::int64_t k, sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                   \
         ungbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \
@@ -1038,7 +1038,7 @@ inline void ungqr(const char* func_name, Func func, sycl::queue& queue, std::int
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -1064,7 +1064,7 @@ UNGQR_LAUNCHER(std::complex<double>, cusolverDnZungqr)
 #undef UNGQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -1073,7 +1073,7 @@ inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -1085,12 +1085,12 @@ inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     });
 }
 
-#define UNGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                    \
-    void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,         \
-               std::int64_t scratchpad_size) {                                                    \
-        ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,       \
-              scratchpad_size);                                                                   \
+#define UNGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
+    void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,          \
+               std::int64_t scratchpad_size) {                                                     \
+        ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,        \
+              scratchpad_size);                                                                    \
     }
 
 UNGTR_LAUNCHER(std::complex<float>, cusolverDnCungtr)
@@ -1098,24 +1098,24 @@ UNGTR_LAUNCHER(std::complex<double>, cusolverDnZungtr)
 
 #undef UNGTR_LAUNCHER
 
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size) {
+void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+           sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "unmrq");
 }
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size) {
+void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+           sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "unmrq");
 }
 
 template <typename Func, typename T>
-inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& c,
                   std::int64_t ldc, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -1125,7 +1125,7 @@ inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -1140,7 +1140,7 @@ inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define UNMQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
-    void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,           \
+    void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,         \
                std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<TYPE>& a,              \
                std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& c, std::int64_t ldc, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                     \
@@ -1154,8 +1154,8 @@ UNMQR_LAUNCHER(std::complex<double>, cusolverDnZunmqr)
 #undef UNMQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& c, std::int64_t ldc, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -1166,7 +1166,7 @@ inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<cuDataType*>(a_acc);
             auto tau_ = sc.get_mem<cuDataType*>(tau_acc);
@@ -1182,8 +1182,8 @@ inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define UNMTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE)                                                     \
-    void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,                 \
-               oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,                       \
+    void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,               \
+               oneapi::math::transpose trans, std::int64_t m, std::int64_t n,                      \
                sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau,                   \
                sycl::buffer<TYPE>& c, std::int64_t ldc, sycl::buffer<TYPE>& scratchpad,            \
                std::int64_t scratchpad_size) {                                                     \
@@ -1215,7 +1215,7 @@ inline sycl::event gebrd(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType_A*>(a);
             auto d_ = reinterpret_cast<cuDataType_B*>(d);
@@ -1280,7 +1280,7 @@ inline sycl::event geqrf(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1328,7 +1328,7 @@ inline sycl::event getrf(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto devInfo_ = reinterpret_cast<int*>(devInfo);
@@ -1391,7 +1391,7 @@ GETRI_LAUNCHER_USM(std::complex<double>)
 // cusolverDnXgetrs does not use scratchpad memory
 template <typename Func, typename T>
 inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T* a,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, T* a,
                          std::int64_t lda, std::int64_t* ipiv, T* b, std::int64_t ldb,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1416,7 +1416,7 @@ inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue,
             cgh.depends_on(dependencies[i]);
         }
         cgh.depends_on(done_casting);
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto ipiv_ = reinterpret_cast<int*>(ipiv32);
@@ -1435,7 +1435,7 @@ inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define GETRS_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                \
-    sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,           \
+    sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,          \
                       std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t* ipiv, TYPE* b,  \
                       std::int64_t ldb, TYPE* scratchpad, std::int64_t scratchpad_size,           \
                       const std::vector<sycl::event>& dependencies) {                             \
@@ -1452,7 +1452,7 @@ GETRS_LAUNCHER_USM(std::complex<double>, cusolverDnZgetrs)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                         oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
                          std::int64_t n, T_A* a, std::int64_t lda, T_B* s, T_A* u, std::int64_t ldu,
                          T_A* vt, std::int64_t ldvt, T_A* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1465,7 +1465,7 @@ inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType_A*>(a);
             auto s_ = reinterpret_cast<cuDataType_B*>(s);
@@ -1486,7 +1486,7 @@ inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define GESVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                      \
-    sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,    \
+    sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,  \
                       std::int64_t m, std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* s,     \
                       TYPE_A* u, std::int64_t ldu, TYPE_A* vt, std::int64_t ldvt,                 \
                       TYPE_A* scratchpad, std::int64_t scratchpad_size,                           \
@@ -1504,7 +1504,7 @@ GESVD_LAUNCHER_USM(std::complex<double>, double, cusolverDnZgesvd)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a,
                          std::int64_t lda, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType_A = typename CudaEquivalentType<T_A>::Type;
@@ -1516,7 +1516,7 @@ inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType_A*>(a);
             auto w_ = reinterpret_cast<cuDataType_B*>(w);
@@ -1534,7 +1534,7 @@ inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define HEEVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                      \
-    sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,          \
+    sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,        \
                       std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* w, TYPE_A* scratchpad, \
                       std::int64_t scratchpad_size,                                               \
                       const std::vector<sycl::event>& dependencies) {                             \
@@ -1549,7 +1549,7 @@ HEEVD_LAUNCHER_USM(std::complex<double>, double, cusolverDnZheevd)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a,
                          std::int64_t lda, T_A*& b, std::int64_t ldb, T_B*& w, T_A*& scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1562,7 +1562,7 @@ inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType_A*>(a);
             auto b_ = reinterpret_cast<cuDataType_A*>(b);
@@ -1581,8 +1581,8 @@ inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, s
 }
 
 #define HEGVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                      \
-    sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,              \
-                      oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda,        \
+    sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,             \
+                      oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda,       \
                       TYPE_A* b, std::int64_t ldb, TYPE_B* w, TYPE_A* scratchpad,                 \
                       std::int64_t scratchpad_size,                                               \
                       const std::vector<sycl::event>& dependencies) {                             \
@@ -1597,7 +1597,7 @@ HEGVD_LAUNCHER_USM(std::complex<double>, double, cusolverDnZhegvd)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d,
+                         oneapi::math::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d,
                          T_B* e, T_A* tau, T_A* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType_A = typename CudaEquivalentType<T_A>::Type;
@@ -1609,7 +1609,7 @@ inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType_A*>(a);
             auto d_ = reinterpret_cast<cuDataType_B*>(d);
@@ -1628,7 +1628,7 @@ inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define HETRD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE)                                   \
-    sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a,   \
+    sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a,  \
                       std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tau, TYPE_A* scratchpad, \
                       std::int64_t scratchpad_size,                                            \
                       const std::vector<sycl::event>& dependencies) {                          \
@@ -1641,13 +1641,13 @@ HETRD_LAUNCHER_USM(std::complex<double>, double, cusolverDnZhetrd)
 
 #undef HETRD_LAUNCHER_USM
 
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "hetrf");
 }
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -1656,7 +1656,7 @@ sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
 
 template <typename Func, typename T>
 inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          T* a, std::int64_t lda, T* tau, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1667,7 +1667,7 @@ inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1681,7 +1681,7 @@ inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define ORGBR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                          \
-    sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,        \
+    sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,       \
                       std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \
                       TYPE* scratchpad, std::int64_t scratchpad_size,                       \
                       const std::vector<sycl::event>& dependencies) {                       \
@@ -1706,7 +1706,7 @@ inline sycl::event orgqr(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1734,7 +1734,7 @@ ORGQR_LAUNCHER_USM(double, cusolverDnDorgqr)
 
 template <typename Func, typename T>
 inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -1744,7 +1744,7 @@ inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1758,7 +1758,7 @@ inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define ORGTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                 \
-    sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,         \
+    sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,        \
                       std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \
                       const std::vector<sycl::event>& dependencies) {                              \
         return orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \
@@ -1772,8 +1772,8 @@ ORGTR_LAUNCHER_USM(double, cusolverDnDorgtr)
 
 template <typename Func, typename T>
 inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a,
                          std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1784,7 +1784,7 @@ inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1801,8 +1801,8 @@ inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define ORMTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                \
-    sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,         \
-                      oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,      \
+    sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,       \
+                      oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,     \
                       std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,   \
                       std::int64_t scratchpad_size,                                               \
                       const std::vector<sycl::event>& dependencies) {                             \
@@ -1815,13 +1815,13 @@ ORMTR_LAUNCHER_USM(double, cusolverDnDormtr)
 
 #undef ORMTR_LAUNCHER_USM
 
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                   float* tau, float* c, std::int64_t ldc, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "ormrq");
 }
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                   double* tau, double* c, std::int64_t ldc, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1830,7 +1830,7 @@ sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::trans
 
 template <typename Func, typename T>
 inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c,
                          std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1841,7 +1841,7 @@ inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -1856,14 +1856,14 @@ inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue,
     return done;
 }
 
-#define ORMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                               \
-    sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,  \
-                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \
-                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                    \
-                      std::int64_t scratchpad_size,                                              \
-                      const std::vector<sycl::event>& dependencies) {                            \
-        return ormqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda,   \
-                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                    \
+#define ORMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                \
+    sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \
+                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda,  \
+                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                     \
+                      std::int64_t scratchpad_size,                                               \
+                      const std::vector<sycl::event>& dependencies) {                             \
+        return ormqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda,    \
+                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                     \
     }
 
 ORMQR_LAUNCHER_USM(float, cusolverDnSormqr)
@@ -1873,7 +1873,7 @@ ORMQR_LAUNCHER_USM(double, cusolverDnDormqr)
 
 template <typename Func, typename T>
 inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -1884,7 +1884,7 @@ inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto devInfo_ = reinterpret_cast<int*>(devInfo);
@@ -1900,7 +1900,7 @@ inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define POTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                            \
-    sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,    \
+    sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,   \
                       std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size,       \
                       const std::vector<sycl::event>& dependencies) {                         \
         return potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \
@@ -1916,7 +1916,7 @@ POTRF_LAUNCHER_USM(std::complex<double>, cusolverDnZpotrf)
 
 template <typename Func, typename T>
 inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -1927,7 +1927,7 @@ inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto scratch_ = reinterpret_cast<cuDataType*>(scratchpad);
@@ -1943,7 +1943,7 @@ inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define POTRI_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                            \
-    sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,    \
+    sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,   \
                       std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size,       \
                       const std::vector<sycl::event>& dependencies) {                         \
         return potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \
@@ -1960,7 +1960,7 @@ POTRI_LAUNCHER_USM(std::complex<double>, cusolverDnZpotri)
 // cusolverDnXpotrs does not use scratchpad memory
 template <typename Func, typename T>
 inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a,
+                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a,
                          std::int64_t lda, T* b, std::int64_t ldb, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1971,7 +1971,7 @@ inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto b_ = reinterpret_cast<cuDataType*>(b);
@@ -1984,7 +1984,7 @@ inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define POTRS_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                              \
-    sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,               \
+    sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,              \
                       std::int64_t nrhs, TYPE* a, std::int64_t lda, TYPE* b, std::int64_t ldb,  \
                       TYPE* scratchpad, std::int64_t scratchpad_size,                           \
                       const std::vector<sycl::event>& dependencies) {                           \
@@ -2001,7 +2001,7 @@ POTRS_LAUNCHER_USM(std::complex<double>, cusolverDnZpotrs)
 
 template <typename Func, typename T>
 inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a,
                          std::int64_t lda, T* w, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -2012,7 +2012,7 @@ inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto w_ = reinterpret_cast<cuDataType*>(w);
@@ -2030,7 +2030,7 @@ inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define SYEVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                          \
-    sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,    \
+    sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,  \
                       std::int64_t n, TYPE* a, std::int64_t lda, TYPE* w, TYPE* scratchpad, \
                       std::int64_t scratchpad_size,                                         \
                       const std::vector<sycl::event>& dependencies) {                       \
@@ -2045,7 +2045,7 @@ SYEVD_LAUNCHER_USM(double, cusolverDnDsyevd)
 
 template <typename Func, typename T>
 inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a,
                          std::int64_t lda, T* b, std::int64_t ldb, T* w, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -2057,7 +2057,7 @@ inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto b_ = reinterpret_cast<cuDataType*>(b);
@@ -2075,13 +2075,13 @@ inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, s
     return done;
 }
 
-#define SYGVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                \
-    sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,              \
-                      oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \
-                      std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size,  \
-                      const std::vector<sycl::event>& dependencies) {                             \
-        return sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, \
-                     ldb, w, scratchpad, scratchpad_size, dependencies);                          \
+#define SYGVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                 \
+    sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,              \
+                      oneapi::math::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \
+                      std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size,   \
+                      const std::vector<sycl::event>& dependencies) {                              \
+        return sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b,  \
+                     ldb, w, scratchpad, scratchpad_size, dependencies);                           \
     }
 
 SYGVD_LAUNCHER_USM(float, cusolverDnSsygvd)
@@ -2091,8 +2091,8 @@ SYGVD_LAUNCHER_USM(double, cusolverDnDsygvd)
 
 template <typename Func, typename T>
 inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d, T* e,
-                         T* tau, T* scratchpad, std::int64_t scratchpad_size,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d,
+                         T* e, T* tau, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
     overflow_check(n, lda, scratchpad_size);
@@ -2102,7 +2102,7 @@ inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto d_ = reinterpret_cast<cuDataType*>(d);
@@ -2121,7 +2121,7 @@ inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define SYTRD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                           \
-    sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,   \
+    sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,  \
                       std::int64_t lda, TYPE* d, TYPE* e, TYPE* tau, TYPE* scratchpad,       \
                       std::int64_t scratchpad_size,                                          \
                       const std::vector<sycl::event>& dependencies) {                        \
@@ -2136,7 +2136,7 @@ SYTRD_LAUNCHER_USM(double, cusolverDnDsytrd)
 
 template <typename Func, typename T>
 inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
                          std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -2154,7 +2154,7 @@ inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto scratch_ = reinterpret_cast<cuDataType*>(scratchpad);
@@ -2183,13 +2183,13 @@ inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue,
     return done_casting;
 }
 
-#define SYTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                         \
-    sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \
-                      std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad,              \
-                      std::int64_t scratchpad_size,                                        \
-                      const std::vector<sycl::event>& dependencies) {                      \
-        return sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv,    \
-                     scratchpad, scratchpad_size, dependencies);                           \
+#define SYTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                          \
+    sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \
+                      std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad,               \
+                      std::int64_t scratchpad_size,                                         \
+                      const std::vector<sycl::event>& dependencies) {                       \
+        return sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv,     \
+                     scratchpad, scratchpad_size, dependencies);                            \
     }
 
 SYTRF_LAUNCHER_USM(float, cusolverDnSsytrf)
@@ -2199,27 +2199,27 @@ SYTRF_LAUNCHER_USM(std::complex<double>, cusolverDnZsytrf)
 
 #undef SYTRF_LAUNCHER_USM
 
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                  std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                  std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                  std::int64_t ldb, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "trtrs");
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double* a,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a,
                   std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "trtrs");
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
                   std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "trtrs");
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                   std::int64_t ldb, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -2228,7 +2228,7 @@ sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::trans
 
 template <typename Func, typename T>
 inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          T* a, std::int64_t lda, T* tau, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -2239,7 +2239,7 @@ inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -2253,7 +2253,7 @@ inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define UNGBR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                          \
-    sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,        \
+    sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,       \
                       std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \
                       TYPE* scratchpad, std::int64_t scratchpad_size,                       \
                       const std::vector<sycl::event>& dependencies) {                       \
@@ -2278,7 +2278,7 @@ inline sycl::event ungqr(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -2306,7 +2306,7 @@ UNGQR_LAUNCHER_USM(std::complex<double>, cusolverDnZungqr)
 
 template <typename Func, typename T>
 inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using cuDataType = typename CudaEquivalentType<T>::Type;
@@ -2316,7 +2316,7 @@ inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -2330,7 +2330,7 @@ inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define UNGTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                 \
-    sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,         \
+    sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,        \
                       std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \
                       const std::vector<sycl::event>& dependencies) {                              \
         return ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \
@@ -2342,14 +2342,14 @@ UNGTR_LAUNCHER_USM(std::complex<double>, cusolverDnZungtr)
 
 #undef UNGTR_LAUNCHER_USM
 
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float>* a,
                   std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
                   std::int64_t ldc, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "unmrq");
 }
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double>* a,
                   std::int64_t lda, std::complex<double>* tau, std::complex<double>* c,
                   std::int64_t ldc, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -2359,7 +2359,7 @@ sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::trans
 
 template <typename Func, typename T>
 inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c,
                          std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -2370,7 +2370,7 @@ inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -2385,14 +2385,14 @@ inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue,
     return done;
 }
 
-#define UNMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                               \
-    sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,  \
-                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \
-                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                    \
-                      std::int64_t scratchpad_size,                                              \
-                      const std::vector<sycl::event>& dependencies) {                            \
-        return unmqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda,   \
-                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                    \
+#define UNMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                \
+    sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \
+                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda,  \
+                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                     \
+                      std::int64_t scratchpad_size,                                               \
+                      const std::vector<sycl::event>& dependencies) {                             \
+        return unmqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda,    \
+                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                     \
     }
 
 UNMQR_LAUNCHER_USM(std::complex<float>, cusolverDnCunmqr)
@@ -2402,8 +2402,8 @@ UNMQR_LAUNCHER_USM(std::complex<double>, cusolverDnZunmqr)
 
 template <typename Func, typename T>
 inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a,
                          std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -2414,7 +2414,7 @@ inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+        onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<cuDataType*>(a);
             auto tau_ = reinterpret_cast<cuDataType*>(tau);
@@ -2431,8 +2431,8 @@ inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define UNMTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE)                                                \
-    sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,         \
-                      oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,      \
+    sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,       \
+                      oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,     \
                       std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,   \
                       std::int64_t scratchpad_size,                                               \
                       const std::vector<sycl::event>& dependencies) {                             \
@@ -2453,7 +2453,7 @@ inline void gebrd_scratchpad_size(const char* func_name, Func func, sycl::queue&
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size);
@@ -2506,7 +2506,7 @@ inline void geqrf_scratchpad_size(const char* func_name, Func func, sycl::queue&
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda,
@@ -2535,12 +2535,12 @@ GEQRF_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZgeqrf_bufferSize)
 
 template <typename Func>
 inline void gesvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+                                  oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                                   std::int64_t m, std::int64_t n, std::int64_t lda,
                                   std::int64_t ldu, std::int64_t ldvt, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size);
@@ -2552,8 +2552,8 @@ inline void gesvd_scratchpad_size(const char* func_name, Func func, sycl::queue&
 #define GESVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                            \
     template <>                                                                                   \
     std::int64_t gesvd_scratchpad_size<TYPE>(                                                     \
-        sycl::queue & queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, \
-        std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) {                  \
+        sycl::queue & queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,               \
+        std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) {  \
         int scratch_size;                                                                         \
         gesvd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobu, jobvt, m, n, lda, \
                               ldu, ldvt, &scratch_size);                                          \
@@ -2573,7 +2573,7 @@ inline void getrf_scratchpad_size(const char* func_name, Func func, sycl::queue&
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda,
@@ -2617,7 +2617,7 @@ GETRI_LAUNCHER_SCRATCH(std::complex<double>)
 // cusolverDnXgetrs does not use scratchpad memory
 #define GETRS_LAUNCHER_SCRATCH(TYPE)                                                              \
     template <>                                                                                   \
-    std::int64_t getrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::transpose trans,   \
+    std::int64_t getrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::transpose trans,  \
                                              std::int64_t n, std::int64_t nrhs, std::int64_t lda, \
                                              std::int64_t ldb) {                                  \
         return 0;                                                                                 \
@@ -2632,11 +2632,11 @@ GETRS_LAUNCHER_SCRATCH(std::complex<double>)
 
 template <typename Func>
 inline void heevd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                   std::int64_t lda, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz),
@@ -2649,8 +2649,8 @@ inline void heevd_scratchpad_size(const char* func_name, Func func, sycl::queue&
 
 #define HEEVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                        \
     template <>                                                                               \
-    std::int64_t heevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::job jobz,      \
-                                             oneapi::mkl::uplo uplo, std::int64_t n,          \
+    std::int64_t heevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::job jobz,     \
+                                             oneapi::math::uplo uplo, std::int64_t n,         \
                                              std::int64_t lda) {                              \
         int scratch_size;                                                                     \
         heevd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, lda, \
@@ -2665,12 +2665,12 @@ HEEVD_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZheevd_bufferSize)
 
 template <typename Func>
 inline void hegvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                  std::int64_t n, std::int64_t lda, std::int64_t ldb,
-                                  int* scratch_size) {
+                                  std::int64_t itype, oneapi::math::job jobz,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  std::int64_t ldb, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype),
@@ -2684,7 +2684,7 @@ inline void hegvd_scratchpad_size(const char* func_name, Func func, sycl::queue&
 #define HEGVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                             \
     template <>                                                                                    \
     std::int64_t hegvd_scratchpad_size<TYPE>(sycl::queue & queue, std::int64_t itype,              \
-                                             oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,        \
+                                             oneapi::math::job jobz, oneapi::math::uplo uplo,      \
                                              std::int64_t n, std::int64_t lda, std::int64_t ldb) { \
         int scratch_size;                                                                          \
         hegvd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n,    \
@@ -2699,11 +2699,11 @@ HEGVD_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZhegvd_bufferSize)
 
 template <typename Func>
 inline void hetrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo),
@@ -2714,14 +2714,14 @@ inline void hetrd_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define HETRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                    \
-    template <>                                                                           \
-    std::int64_t hetrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        int scratch_size;                                                                 \
-        hetrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,   \
-                              &scratch_size);                                             \
-        return scratch_size;                                                              \
+#define HETRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                     \
+    template <>                                                                            \
+    std::int64_t hetrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        int scratch_size;                                                                  \
+        hetrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,    \
+                              &scratch_size);                                              \
+        return scratch_size;                                                               \
     }
 
 HETRD_LAUNCHER_SCRATCH(std::complex<float>, cusolverDnChetrd_bufferSize)
@@ -2730,23 +2730,24 @@ HETRD_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZhetrd_bufferSize)
 #undef HETRD_LAUNCHER_SCRATCH
 
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     throw unimplemented("lapack", "hetrf_scratchpad_size");
 }
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     throw unimplemented("lapack", "hetrf_scratchpad_size");
 }
 
 template <typename Func>
 inline void orgbr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+                                  oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                   std::int64_t k, std::int64_t lda, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec),
@@ -2756,15 +2757,15 @@ inline void orgbr_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define ORGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                       \
-    template <>                                                                              \
-    std::int64_t orgbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::generate vec, \
-                                             std::int64_t m, std::int64_t n, std::int64_t k, \
-                                             std::int64_t lda) {                             \
-        int scratch_size;                                                                    \
-        orgbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda, \
-                              &scratch_size);                                                \
-        return scratch_size;                                                                 \
+#define ORGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                        \
+    template <>                                                                               \
+    std::int64_t orgbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::generate vec, \
+                                             std::int64_t m, std::int64_t n, std::int64_t k,  \
+                                             std::int64_t lda) {                              \
+        int scratch_size;                                                                     \
+        orgbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda,  \
+                              &scratch_size);                                                 \
+        return scratch_size;                                                                  \
     }
 
 ORGBR_LAUNCHER_SCRATCH(float, cusolverDnSorgbr_bufferSize)
@@ -2774,11 +2775,11 @@ ORGBR_LAUNCHER_SCRATCH(double, cusolverDnDorgbr_bufferSize)
 
 template <typename Func>
 inline void orgtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo),
@@ -2788,14 +2789,14 @@ inline void orgtr_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define ORGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                    \
-    template <>                                                                           \
-    std::int64_t orgtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        int scratch_size;                                                                 \
-        orgtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,   \
-                              &scratch_size);                                             \
-        return scratch_size;                                                              \
+#define ORGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                     \
+    template <>                                                                            \
+    std::int64_t orgtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        int scratch_size;                                                                  \
+        orgtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,    \
+                              &scratch_size);                                              \
+        return scratch_size;                                                               \
     }
 
 ORGTR_LAUNCHER_SCRATCH(float, cusolverDnSorgtr_bufferSize)
@@ -2809,7 +2810,7 @@ inline void orgqr_scratchpad_size(const char* func_name, Func func, sycl::queue&
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda,
@@ -2835,15 +2836,15 @@ ORGQR_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize)
 #undef ORGQR_LAUNCHER_SCRATCH
 
 template <>
-std::int64_t ormrq_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                          oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormrq_scratchpad_size<float>(sycl::queue& queue, oneapi::math::side side,
+                                          oneapi::math::transpose trans, std::int64_t m,
                                           std::int64_t n, std::int64_t k, std::int64_t lda,
                                           std::int64_t ldc) {
     throw unimplemented("lapack", "ormrq_scratchpad_size");
 }
 template <>
-std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                           oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::math::side side,
+                                           oneapi::math::transpose trans, std::int64_t m,
                                            std::int64_t n, std::int64_t k, std::int64_t lda,
                                            std::int64_t ldc) {
     throw unimplemented("lapack", "ormrq_scratchpad_size");
@@ -2851,12 +2852,12 @@ std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side
 
 template <typename Func>
 inline void ormqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                  oneapi::math::side side, oneapi::math::transpose trans,
                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                   std::int64_t ldc, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side),
@@ -2867,15 +2868,15 @@ inline void ormqr_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define ORMQRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                            \
-    template <>                                                                                    \
-    std::int64_t ormqr_scratchpad_size<TYPE>(                                                      \
-        sycl::queue & queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, \
-        std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {                      \
-        int scratch_size;                                                                          \
-        ormqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k,    \
-                              lda, ldc, &scratch_size);                                            \
-        return scratch_size;                                                                       \
+#define ORMQRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                         \
+    template <>                                                                                 \
+    std::int64_t ormqr_scratchpad_size<TYPE>(                                                   \
+        sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans,            \
+        std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {   \
+        int scratch_size;                                                                       \
+        ormqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, \
+                              lda, ldc, &scratch_size);                                         \
+        return scratch_size;                                                                    \
     }
 
 ORMQRF_LAUNCHER_SCRATCH(float, cusolverDnSormqr_bufferSize)
@@ -2885,12 +2886,12 @@ ORMQRF_LAUNCHER_SCRATCH(double, cusolverDnDormqr_bufferSize)
 
 template <typename Func>
 inline void ormtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                  oneapi::math::side side, oneapi::math::uplo uplo,
+                                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                   std::int64_t lda, std::int64_t ldc, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side),
@@ -2903,10 +2904,10 @@ inline void ormtr_scratchpad_size(const char* func_name, Func func, sycl::queue&
 
 #define ORMTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                             \
     template <>                                                                                    \
-    std::int64_t ormtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::side side,          \
-                                             oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, \
-                                             std::int64_t m, std::int64_t n, std::int64_t lda,     \
-                                             std::int64_t ldc) {                                   \
+    std::int64_t ormtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::side side,         \
+                                             oneapi::math::uplo uplo,                              \
+                                             oneapi::math::transpose trans, std::int64_t m,        \
+                                             std::int64_t n, std::int64_t lda, std::int64_t ldc) { \
         int scratch_size;                                                                          \
         ormtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, \
                               lda, ldc, &scratch_size);                                            \
@@ -2920,11 +2921,11 @@ ORMTR_LAUNCHER_SCRATCH(double, cusolverDnDormtr_bufferSize)
 
 template <typename Func>
 inline void potrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo),
@@ -2934,14 +2935,14 @@ inline void potrf_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define POTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                    \
-    template <>                                                                           \
-    std::int64_t potrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        int scratch_size;                                                                 \
-        potrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,   \
-                              &scratch_size);                                             \
-        return scratch_size;                                                              \
+#define POTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                     \
+    template <>                                                                            \
+    std::int64_t potrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        int scratch_size;                                                                  \
+        potrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,    \
+                              &scratch_size);                                              \
+        return scratch_size;                                                               \
     }
 
 POTRF_LAUNCHER_SCRATCH(float, cusolverDnSpotrf_bufferSize)
@@ -2954,7 +2955,7 @@ POTRF_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZpotrf_bufferSize)
 // cusolverDnXpotrs does not use scratchpad memory
 #define POTRS_LAUNCHER_SCRATCH(TYPE)                                                              \
     template <>                                                                                   \
-    std::int64_t potrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo,         \
+    std::int64_t potrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo,        \
                                              std::int64_t n, std::int64_t nrhs, std::int64_t lda, \
                                              std::int64_t ldb) {                                  \
         return 0;                                                                                 \
@@ -2969,11 +2970,11 @@ POTRS_LAUNCHER_SCRATCH(std::complex<double>)
 
 template <typename Func>
 inline void potri_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo),
@@ -2983,14 +2984,14 @@ inline void potri_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define POTRI_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                    \
-    template <>                                                                           \
-    std::int64_t potri_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        int scratch_size;                                                                 \
-        potri_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,   \
-                              &scratch_size);                                             \
-        return scratch_size;                                                              \
+#define POTRI_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                     \
+    template <>                                                                            \
+    std::int64_t potri_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        int scratch_size;                                                                  \
+        potri_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,    \
+                              &scratch_size);                                              \
+        return scratch_size;                                                               \
     }
 
 POTRI_LAUNCHER_SCRATCH(float, cusolverDnSpotri_bufferSize)
@@ -3002,11 +3003,11 @@ POTRI_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZpotri_bufferSize)
 
 template <typename Func>
 inline void sytrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, nullptr, lda,
@@ -3016,14 +3017,14 @@ inline void sytrf_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define SYTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                    \
-    template <>                                                                           \
-    std::int64_t sytrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        int scratch_size;                                                                 \
-        sytrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,   \
-                              &scratch_size);                                             \
-        return scratch_size;                                                              \
+#define SYTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                     \
+    template <>                                                                            \
+    std::int64_t sytrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        int scratch_size;                                                                  \
+        sytrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,    \
+                              &scratch_size);                                              \
+        return scratch_size;                                                               \
     }
 
 SYTRF_LAUNCHER_SCRATCH(float, cusolverDnSsytrf_bufferSize)
@@ -3035,11 +3036,11 @@ SYTRF_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZsytrf_bufferSize)
 
 template <typename Func>
 inline void syevd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                                   std::int64_t lda, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz),
@@ -3052,8 +3053,8 @@ inline void syevd_scratchpad_size(const char* func_name, Func func, sycl::queue&
 
 #define SYEVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                        \
     template <>                                                                               \
-    std::int64_t syevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::job jobz,      \
-                                             oneapi::mkl::uplo uplo, std::int64_t n,          \
+    std::int64_t syevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::job jobz,     \
+                                             oneapi::math::uplo uplo, std::int64_t n,         \
                                              std::int64_t lda) {                              \
         int scratch_size;                                                                     \
         syevd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, lda, \
@@ -3068,12 +3069,12 @@ SYEVD_LAUNCHER_SCRATCH(double, cusolverDnDsyevd_bufferSize)
 
 template <typename Func>
 inline void sygvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-                                  std::int64_t n, std::int64_t lda, std::int64_t ldb,
-                                  int* scratch_size) {
+                                  std::int64_t itype, oneapi::math::job jobz,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  std::int64_t ldb, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype),
@@ -3087,7 +3088,7 @@ inline void sygvd_scratchpad_size(const char* func_name, Func func, sycl::queue&
 #define SYGVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                             \
     template <>                                                                                    \
     std::int64_t sygvd_scratchpad_size<TYPE>(sycl::queue & queue, std::int64_t itype,              \
-                                             oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,        \
+                                             oneapi::math::job jobz, oneapi::math::uplo uplo,      \
                                              std::int64_t n, std::int64_t lda, std::int64_t ldb) { \
         int scratch_size;                                                                          \
         sygvd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n,    \
@@ -3102,11 +3103,11 @@ SYGVD_LAUNCHER_SCRATCH(double, cusolverDnDsygvd_bufferSize)
 
 template <typename Func>
 inline void sytrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo),
@@ -3117,14 +3118,14 @@ inline void sytrd_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define SYTRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                    \
-    template <>                                                                           \
-    std::int64_t sytrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        int scratch_size;                                                                 \
-        sytrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,   \
-                              &scratch_size);                                             \
-        return scratch_size;                                                              \
+#define SYTRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                     \
+    template <>                                                                            \
+    std::int64_t sytrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        int scratch_size;                                                                  \
+        sytrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,    \
+                              &scratch_size);                                              \
+        return scratch_size;                                                               \
     }
 
 SYTRD_LAUNCHER_SCRATCH(float, cusolverDnSsytrd_bufferSize)
@@ -3133,31 +3134,32 @@ SYTRD_LAUNCHER_SCRATCH(double, cusolverDnDsytrd_bufferSize)
 #undef SYTRD_LAUNCHER_SCRATCH
 
 template <>
-std::int64_t trtrs_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                          oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+std::int64_t trtrs_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
+                                          oneapi::math::transpose trans, oneapi::math::diag diag,
                                           std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                           std::int64_t ldb) {
     throw unimplemented("lapack", "trtrs_scratchpad_size");
 }
 template <>
-std::int64_t trtrs_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                           oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+std::int64_t trtrs_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
+                                           oneapi::math::transpose trans, oneapi::math::diag diag,
                                            std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                            std::int64_t ldb) {
     throw unimplemented("lapack", "trtrs_scratchpad_size");
 }
 template <>
-std::int64_t trtrs_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
-                                                        oneapi::mkl::diag diag, std::int64_t n,
+std::int64_t trtrs_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                        oneapi::math::transpose trans,
+                                                        oneapi::math::diag diag, std::int64_t n,
                                                         std::int64_t nrhs, std::int64_t lda,
                                                         std::int64_t ldb) {
     throw unimplemented("lapack", "trtrs_scratchpad_size");
 }
 template <>
-std::int64_t trtrs_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         oneapi::mkl::diag diag, std::int64_t n,
+std::int64_t trtrs_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo,
+                                                         oneapi::math::transpose trans,
+                                                         oneapi::math::diag diag, std::int64_t n,
                                                          std::int64_t nrhs, std::int64_t lda,
                                                          std::int64_t ldb) {
     throw unimplemented("lapack", "trtrs_scratchpad_size");
@@ -3165,11 +3167,11 @@ std::int64_t trtrs_scratchpad_size<std::complex<double>>(sycl::queue& queue, one
 
 template <typename Func>
 inline void ungbr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+                                  oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                                   std::int64_t k, std::int64_t lda, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec),
@@ -3179,15 +3181,15 @@ inline void ungbr_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define UNGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                       \
-    template <>                                                                              \
-    std::int64_t ungbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::generate vec, \
-                                             std::int64_t m, std::int64_t n, std::int64_t k, \
-                                             std::int64_t lda) {                             \
-        int scratch_size;                                                                    \
-        ungbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda, \
-                              &scratch_size);                                                \
-        return scratch_size;                                                                 \
+#define UNGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                        \
+    template <>                                                                               \
+    std::int64_t ungbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::generate vec, \
+                                             std::int64_t m, std::int64_t n, std::int64_t k,  \
+                                             std::int64_t lda) {                              \
+        int scratch_size;                                                                     \
+        ungbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda,  \
+                              &scratch_size);                                                 \
+        return scratch_size;                                                                  \
     }
 
 UNGBR_LAUNCHER_SCRATCH(std::complex<float>, cusolverDnCungbr_bufferSize)
@@ -3201,7 +3203,7 @@ inline void ungqr_scratchpad_size(const char* func_name, Func func, sycl::queue&
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda,
@@ -3228,11 +3230,11 @@ UNGQR_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZungqr_bufferSize)
 
 template <typename Func>
 inline void ungtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+                                  oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                   int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo),
@@ -3242,14 +3244,14 @@ inline void ungtr_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define UNGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                    \
-    template <>                                                                           \
-    std::int64_t ungtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        int scratch_size;                                                                 \
-        ungtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,   \
-                              &scratch_size);                                             \
-        return scratch_size;                                                              \
+#define UNGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                     \
+    template <>                                                                            \
+    std::int64_t ungtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        int scratch_size;                                                                  \
+        ungtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda,    \
+                              &scratch_size);                                              \
+        return scratch_size;                                                               \
     }
 
 UNGTR_LAUNCHER_SCRATCH(std::complex<float>, cusolverDnCungtr_bufferSize)
@@ -3258,30 +3260,28 @@ UNGTR_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZungtr_bufferSize)
 #undef UNGTR_LAUNCHER_SCRATCH
 
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmrq_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t k, std::int64_t lda,
                                                         std::int64_t ldc) {
     throw unimplemented("lapack", "unmrq_scratchpad_size");
 }
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc) {
+std::int64_t unmrq_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {
     throw unimplemented("lapack", "unmrq_scratchpad_size");
 }
 
 template <typename Func>
 inline void unmqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::side side, oneapi::mkl::transpose trans,
+                                  oneapi::math::side side, oneapi::math::transpose trans,
                                   std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
                                   std::int64_t ldc, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side),
@@ -3292,15 +3292,15 @@ inline void unmqr_scratchpad_size(const char* func_name, Func func, sycl::queue&
         .wait();
 }
 
-#define UNMQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                             \
-    template <>                                                                                    \
-    std::int64_t unmqr_scratchpad_size<TYPE>(                                                      \
-        sycl::queue & queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, \
-        std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {                      \
-        int scratch_size;                                                                          \
-        unmqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k,    \
-                              lda, ldc, &scratch_size);                                            \
-        return scratch_size;                                                                       \
+#define UNMQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                          \
+    template <>                                                                                 \
+    std::int64_t unmqr_scratchpad_size<TYPE>(                                                   \
+        sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans,            \
+        std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {   \
+        int scratch_size;                                                                       \
+        unmqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, \
+                              lda, ldc, &scratch_size);                                         \
+        return scratch_size;                                                                    \
     }
 
 UNMQR_LAUNCHER_SCRATCH(std::complex<float>, cusolverDnCunmqr_bufferSize)
@@ -3310,12 +3310,12 @@ UNMQR_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZunmqr_bufferSize)
 
 template <typename Func>
 inline void unmtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue,
-                                  oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+                                  oneapi::math::side side, oneapi::math::uplo uplo,
+                                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                   std::int64_t lda, std::int64_t ldc, int* scratch_size) {
     queue
         .submit([&](sycl::handler& cgh) {
-            onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
+            onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) {
                 auto handle = sc.get_handle(queue);
                 cusolverStatus_t err;
                 CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side),
@@ -3328,10 +3328,10 @@ inline void unmtr_scratchpad_size(const char* func_name, Func func, sycl::queue&
 
 #define UNMTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE)                                             \
     template <>                                                                                    \
-    std::int64_t unmtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::side side,          \
-                                             oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, \
-                                             std::int64_t m, std::int64_t n, std::int64_t lda,     \
-                                             std::int64_t ldc) {                                   \
+    std::int64_t unmtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::side side,         \
+                                             oneapi::math::uplo uplo,                              \
+                                             oneapi::math::transpose trans, std::int64_t m,        \
+                                             std::int64_t n, std::int64_t lda, std::int64_t ldc) { \
         int scratch_size;                                                                          \
         unmtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, \
                               lda, ldc, &scratch_size);                                            \
@@ -3345,5 +3345,5 @@ UNMTR_LAUNCHER_SCRATCH(std::complex<double>, cusolverDnZunmtr_bufferSize)
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/cusolver/cusolver_scope_handle.cpp b/src/lapack/backends/cusolver/cusolver_scope_handle.cpp
index edd731978..af0881c10 100644
--- a/src/lapack/backends/cusolver/cusolver_scope_handle.cpp
+++ b/src/lapack/backends/cusolver/cusolver_scope_handle.cpp
@@ -24,7 +24,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
@@ -35,7 +35,7 @@ namespace cusolver {
  * takes place if no other element in the container has a key equivalent to
  * the one being emplaced (keys in a map container are unique).
  */
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
 thread_local cusolver_handle<ur_context_handle_t> CusolverScopedContextHandler::handle_helper =
     cusolver_handle<ur_context_handle_t>{};
 #else
@@ -98,7 +98,7 @@ cusolverDnHandle_t CusolverScopedContextHandler::get_handle(const sycl::queue& q
     CUresult cuErr;
     CUcontext desired;
     CUDA_ERROR_FUNC(cuDevicePrimaryCtxRetain, cuErr, &desired, cudaDevice);
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
     auto piPlacedContext_ = reinterpret_cast<ur_context_handle_t>(desired);
 #else
     auto piPlacedContext_ = reinterpret_cast<pi_context>(desired);
@@ -149,5 +149,5 @@ sycl::context CusolverScopedContextHandler::get_context(const sycl::queue& queue
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/cusolver/cusolver_scope_handle.hpp b/src/lapack/backends/cusolver/cusolver_scope_handle.hpp
index 34026bf78..d6443b41a 100644
--- a/src/lapack/backends/cusolver/cusolver_scope_handle.hpp
+++ b/src/lapack/backends/cusolver/cusolver_scope_handle.hpp
@@ -42,8 +42,8 @@
 // After Plugin Interface removal in DPC++ ur.hpp is the new include
 #if __has_include(<sycl/detail/ur.hpp>)
 #include <sycl/detail/ur.hpp>
-#ifndef ONEMKL_PI_INTERFACE_REMOVED
-#define ONEMKL_PI_INTERFACE_REMOVED
+#ifndef ONEMATH_PI_INTERFACE_REMOVED
+#define ONEMATH_PI_INTERFACE_REMOVED
 #endif
 #elif __has_include(<sycl/detail/pi.hpp>)
 #include <sycl/detail/pi.hpp>
@@ -52,7 +52,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
@@ -71,7 +71,7 @@ According to NVIDIA:
  using the same host thread.
 
 The advice above is for using cublas with the cuda runtime API. Given that cusolver is based on cublas the advice is 
-transferable. The cusolver_scope_handle is based on the oneMKL cublas_scope_handle. The NVIDIA runtime API creates a 
+transferable. The cusolver_scope_handle is based on the oneMath cublas_scope_handle. The NVIDIA runtime API creates a 
 default context for users. The cusolverDnCreate function in uses the context located on top of the stack for each thread. 
 Then, the cuSolver routine uses this context for resource allocation/access. Calling a cuSolver function with a handle 
 created for context A and memories/queue created for context B results in a segmentation fault. Thus we need to create 
@@ -92,7 +92,7 @@ class CusolverScopedContextHandler {
     sycl::context* placedContext_;
     bool needToRecover_;
     sycl::interop_handle& ih;
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
     static thread_local cusolver_handle<ur_context_handle_t> handle_helper;
 #else
     static thread_local cusolver_handle<pi_context> handle_helper;
@@ -127,6 +127,6 @@ class CusolverScopedContextHandler {
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif //_CUSOLVER_SCOPED_HANDLE_HPP_
diff --git a/src/lapack/backends/cusolver/cusolver_task.hpp b/src/lapack/backends/cusolver/cusolver_task.hpp
index 99e51d8ac..802230c9a 100644
--- a/src/lapack/backends/cusolver/cusolver_task.hpp
+++ b/src/lapack/backends/cusolver/cusolver_task.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef _MKL_LAPACK_CUSOLVER_TASK_HPP_
-#define _MKL_LAPACK_CUSOLVER_TASK_HPP_
+#ifndef ONEMATH_LAPACK_CUSOLVER_TASK_HPP_
+#define ONEMATH_LAPACK_CUSOLVER_TASK_HPP_
 #include <cuda.h>
 #include <cublas_v2.h>
 #include <cusolverDn.h>
@@ -28,14 +28,14 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include "cusolver_scope_handle.hpp"
 
 // After Plugin Interface removal in DPC++ ur.hpp is the new include
 #if __has_include(<sycl/detail/ur.hpp>)
 #include <sycl/detail/ur.hpp>
-#ifndef ONEMKL_PI_INTERFACE_REMOVED
-#define ONEMKL_PI_INTERFACE_REMOVED
+#ifndef ONEMATH_PI_INTERFACE_REMOVED
+#define ONEMATH_PI_INTERFACE_REMOVED
 #endif
 #elif __has_include(<sycl/detail/pi.hpp>)
 #include <sycl/detail/pi.hpp>
@@ -44,7 +44,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace cusolver {
 
@@ -61,12 +61,12 @@ static inline void host_task_internal(H& cgh, sycl::queue queue, F f) {
 }
 
 template <typename H, typename F>
-static inline void onemkl_cusolver_host_task(H& cgh, sycl::queue queue, F f) {
+static inline void onemath_cusolver_host_task(H& cgh, sycl::queue queue, F f) {
     (void)host_task_internal(cgh, queue, f);
 }
 
 } // namespace cusolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
-#endif // _MKL_LAPACK_CUSOLVER_TASK_HPP_
+#endif // ONEMATH_LAPACK_CUSOLVER_TASK_HPP_
diff --git a/src/lapack/backends/cusolver/cusolver_wrappers.cpp b/src/lapack/backends/cusolver/cusolver_wrappers.cpp
index 4b5ab8e2c..13db62901 100644
--- a/src/lapack/backends/cusolver/cusolver_wrappers.cpp
+++ b/src/lapack/backends/cusolver/cusolver_wrappers.cpp
@@ -17,410 +17,410 @@
 *
 **************************************************************************/
 #include "lapack/function_table.hpp"
-#include "oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hpp"
+#include "oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT lapack_function_table_t mkl_lapack_table = {
+extern "C" ONEMATH_EXPORT lapack_function_table_t onemath_lapack_table = {
     WRAPPER_VERSION,
 #define LAPACK_BACKEND cusolver
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::heevd,
-    oneapi::mkl::lapack::cusolver::heevd,
-    oneapi::mkl::lapack::cusolver::hegvd,
-    oneapi::mkl::lapack::cusolver::hegvd,
-    oneapi::mkl::lapack::cusolver::hetrd,
-    oneapi::mkl::lapack::cusolver::hetrd,
-    oneapi::mkl::lapack::cusolver::hetrf,
-    oneapi::mkl::lapack::cusolver::hetrf,
-    oneapi::mkl::lapack::cusolver::orgbr,
-    oneapi::mkl::lapack::cusolver::orgbr,
-    oneapi::mkl::lapack::cusolver::orgqr,
-    oneapi::mkl::lapack::cusolver::orgqr,
-    oneapi::mkl::lapack::cusolver::orgtr,
-    oneapi::mkl::lapack::cusolver::orgtr,
-    oneapi::mkl::lapack::cusolver::ormtr,
-    oneapi::mkl::lapack::cusolver::ormtr,
-    oneapi::mkl::lapack::cusolver::ormrq,
-    oneapi::mkl::lapack::cusolver::ormrq,
-    oneapi::mkl::lapack::cusolver::ormqr,
-    oneapi::mkl::lapack::cusolver::ormqr,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::syevd,
-    oneapi::mkl::lapack::cusolver::syevd,
-    oneapi::mkl::lapack::cusolver::sygvd,
-    oneapi::mkl::lapack::cusolver::sygvd,
-    oneapi::mkl::lapack::cusolver::sytrd,
-    oneapi::mkl::lapack::cusolver::sytrd,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::ungbr,
-    oneapi::mkl::lapack::cusolver::ungbr,
-    oneapi::mkl::lapack::cusolver::ungqr,
-    oneapi::mkl::lapack::cusolver::ungqr,
-    oneapi::mkl::lapack::cusolver::ungtr,
-    oneapi::mkl::lapack::cusolver::ungtr,
-    oneapi::mkl::lapack::cusolver::unmrq,
-    oneapi::mkl::lapack::cusolver::unmrq,
-    oneapi::mkl::lapack::cusolver::unmqr,
-    oneapi::mkl::lapack::cusolver::unmqr,
-    oneapi::mkl::lapack::cusolver::unmtr,
-    oneapi::mkl::lapack::cusolver::unmtr,
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gebrd,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::gerqf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::geqrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getrf,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getri,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::getrs,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::gesvd,
-    oneapi::mkl::lapack::cusolver::heevd,
-    oneapi::mkl::lapack::cusolver::heevd,
-    oneapi::mkl::lapack::cusolver::hegvd,
-    oneapi::mkl::lapack::cusolver::hegvd,
-    oneapi::mkl::lapack::cusolver::hetrd,
-    oneapi::mkl::lapack::cusolver::hetrd,
-    oneapi::mkl::lapack::cusolver::hetrf,
-    oneapi::mkl::lapack::cusolver::hetrf,
-    oneapi::mkl::lapack::cusolver::orgbr,
-    oneapi::mkl::lapack::cusolver::orgbr,
-    oneapi::mkl::lapack::cusolver::orgqr,
-    oneapi::mkl::lapack::cusolver::orgqr,
-    oneapi::mkl::lapack::cusolver::orgtr,
-    oneapi::mkl::lapack::cusolver::orgtr,
-    oneapi::mkl::lapack::cusolver::ormtr,
-    oneapi::mkl::lapack::cusolver::ormtr,
-    oneapi::mkl::lapack::cusolver::ormrq,
-    oneapi::mkl::lapack::cusolver::ormrq,
-    oneapi::mkl::lapack::cusolver::ormqr,
-    oneapi::mkl::lapack::cusolver::ormqr,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potrf,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potri,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::potrs,
-    oneapi::mkl::lapack::cusolver::syevd,
-    oneapi::mkl::lapack::cusolver::syevd,
-    oneapi::mkl::lapack::cusolver::sygvd,
-    oneapi::mkl::lapack::cusolver::sygvd,
-    oneapi::mkl::lapack::cusolver::sytrd,
-    oneapi::mkl::lapack::cusolver::sytrd,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::sytrf,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::trtrs,
-    oneapi::mkl::lapack::cusolver::ungbr,
-    oneapi::mkl::lapack::cusolver::ungbr,
-    oneapi::mkl::lapack::cusolver::ungqr,
-    oneapi::mkl::lapack::cusolver::ungqr,
-    oneapi::mkl::lapack::cusolver::ungtr,
-    oneapi::mkl::lapack::cusolver::ungtr,
-    oneapi::mkl::lapack::cusolver::unmrq,
-    oneapi::mkl::lapack::cusolver::unmrq,
-    oneapi::mkl::lapack::cusolver::unmqr,
-    oneapi::mkl::lapack::cusolver::unmqr,
-    oneapi::mkl::lapack::cusolver::unmtr,
-    oneapi::mkl::lapack::cusolver::unmtr,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::orgqr_batch,
-    oneapi::mkl::lapack::cusolver::orgqr_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::ungqr_batch,
-    oneapi::mkl::lapack::cusolver::ungqr_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::orgqr_batch,
-    oneapi::mkl::lapack::cusolver::orgqr_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::ungqr_batch,
-    oneapi::mkl::lapack::cusolver::ungqr_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::geqrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getrf_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getri_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::getrs_batch,
-    oneapi::mkl::lapack::cusolver::orgqr_batch,
-    oneapi::mkl::lapack::cusolver::orgqr_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrf_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::potrs_batch,
-    oneapi::mkl::lapack::cusolver::ungqr_batch,
-    oneapi::mkl::lapack::cusolver::ungqr_batch,
-    oneapi::mkl::lapack::cusolver::gebrd_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::gebrd_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::gebrd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::gebrd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::gerqf_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::gerqf_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::gerqf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::gerqf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::geqrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::geqrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::geqrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::geqrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::gesvd_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::gesvd_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::gesvd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::gesvd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getri_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getri_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getri_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getri_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::heevd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::heevd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::hegvd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::hegvd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::hetrd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::hetrd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::hetrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::hetrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::orgbr_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::orgbr_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::orgtr_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::orgtr_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::orgqr_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::orgqr_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::ormrq_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::ormrq_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::ormqr_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::ormqr_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::ormtr_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::ormtr_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::potrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::potrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::potrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::potrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::potrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::potri_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::potri_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potri_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::potri_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::sytrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::sytrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::sytrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::sytrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::syevd_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::syevd_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::sygvd_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::sygvd_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::sytrd_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::sytrd_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::trtrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::trtrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::trtrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::trtrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::ungbr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::ungbr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::ungqr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::ungqr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::ungtr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::ungtr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::unmrq_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::unmrq_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::unmqr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::unmqr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::unmtr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::unmtr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::orgqr_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::orgqr_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::orgqr_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::orgqr_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<double>>
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::heevd,
+    oneapi::math::lapack::cusolver::heevd,
+    oneapi::math::lapack::cusolver::hegvd,
+    oneapi::math::lapack::cusolver::hegvd,
+    oneapi::math::lapack::cusolver::hetrd,
+    oneapi::math::lapack::cusolver::hetrd,
+    oneapi::math::lapack::cusolver::hetrf,
+    oneapi::math::lapack::cusolver::hetrf,
+    oneapi::math::lapack::cusolver::orgbr,
+    oneapi::math::lapack::cusolver::orgbr,
+    oneapi::math::lapack::cusolver::orgqr,
+    oneapi::math::lapack::cusolver::orgqr,
+    oneapi::math::lapack::cusolver::orgtr,
+    oneapi::math::lapack::cusolver::orgtr,
+    oneapi::math::lapack::cusolver::ormtr,
+    oneapi::math::lapack::cusolver::ormtr,
+    oneapi::math::lapack::cusolver::ormrq,
+    oneapi::math::lapack::cusolver::ormrq,
+    oneapi::math::lapack::cusolver::ormqr,
+    oneapi::math::lapack::cusolver::ormqr,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::syevd,
+    oneapi::math::lapack::cusolver::syevd,
+    oneapi::math::lapack::cusolver::sygvd,
+    oneapi::math::lapack::cusolver::sygvd,
+    oneapi::math::lapack::cusolver::sytrd,
+    oneapi::math::lapack::cusolver::sytrd,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::ungbr,
+    oneapi::math::lapack::cusolver::ungbr,
+    oneapi::math::lapack::cusolver::ungqr,
+    oneapi::math::lapack::cusolver::ungqr,
+    oneapi::math::lapack::cusolver::ungtr,
+    oneapi::math::lapack::cusolver::ungtr,
+    oneapi::math::lapack::cusolver::unmrq,
+    oneapi::math::lapack::cusolver::unmrq,
+    oneapi::math::lapack::cusolver::unmqr,
+    oneapi::math::lapack::cusolver::unmqr,
+    oneapi::math::lapack::cusolver::unmtr,
+    oneapi::math::lapack::cusolver::unmtr,
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gebrd,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::gerqf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::geqrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getrf,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getri,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::getrs,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::gesvd,
+    oneapi::math::lapack::cusolver::heevd,
+    oneapi::math::lapack::cusolver::heevd,
+    oneapi::math::lapack::cusolver::hegvd,
+    oneapi::math::lapack::cusolver::hegvd,
+    oneapi::math::lapack::cusolver::hetrd,
+    oneapi::math::lapack::cusolver::hetrd,
+    oneapi::math::lapack::cusolver::hetrf,
+    oneapi::math::lapack::cusolver::hetrf,
+    oneapi::math::lapack::cusolver::orgbr,
+    oneapi::math::lapack::cusolver::orgbr,
+    oneapi::math::lapack::cusolver::orgqr,
+    oneapi::math::lapack::cusolver::orgqr,
+    oneapi::math::lapack::cusolver::orgtr,
+    oneapi::math::lapack::cusolver::orgtr,
+    oneapi::math::lapack::cusolver::ormtr,
+    oneapi::math::lapack::cusolver::ormtr,
+    oneapi::math::lapack::cusolver::ormrq,
+    oneapi::math::lapack::cusolver::ormrq,
+    oneapi::math::lapack::cusolver::ormqr,
+    oneapi::math::lapack::cusolver::ormqr,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potrf,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potri,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::potrs,
+    oneapi::math::lapack::cusolver::syevd,
+    oneapi::math::lapack::cusolver::syevd,
+    oneapi::math::lapack::cusolver::sygvd,
+    oneapi::math::lapack::cusolver::sygvd,
+    oneapi::math::lapack::cusolver::sytrd,
+    oneapi::math::lapack::cusolver::sytrd,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::sytrf,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::trtrs,
+    oneapi::math::lapack::cusolver::ungbr,
+    oneapi::math::lapack::cusolver::ungbr,
+    oneapi::math::lapack::cusolver::ungqr,
+    oneapi::math::lapack::cusolver::ungqr,
+    oneapi::math::lapack::cusolver::ungtr,
+    oneapi::math::lapack::cusolver::ungtr,
+    oneapi::math::lapack::cusolver::unmrq,
+    oneapi::math::lapack::cusolver::unmrq,
+    oneapi::math::lapack::cusolver::unmqr,
+    oneapi::math::lapack::cusolver::unmqr,
+    oneapi::math::lapack::cusolver::unmtr,
+    oneapi::math::lapack::cusolver::unmtr,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::orgqr_batch,
+    oneapi::math::lapack::cusolver::orgqr_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::ungqr_batch,
+    oneapi::math::lapack::cusolver::ungqr_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::orgqr_batch,
+    oneapi::math::lapack::cusolver::orgqr_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::ungqr_batch,
+    oneapi::math::lapack::cusolver::ungqr_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::geqrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getrf_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getri_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::getrs_batch,
+    oneapi::math::lapack::cusolver::orgqr_batch,
+    oneapi::math::lapack::cusolver::orgqr_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrf_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::potrs_batch,
+    oneapi::math::lapack::cusolver::ungqr_batch,
+    oneapi::math::lapack::cusolver::ungqr_batch,
+    oneapi::math::lapack::cusolver::gebrd_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::gebrd_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::gebrd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::gebrd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::gerqf_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::gerqf_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::gerqf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::gerqf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::geqrf_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::geqrf_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::geqrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::geqrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::gesvd_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::gesvd_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::gesvd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::gesvd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getrf_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getrf_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getri_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getri_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getri_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getri_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getrs_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getrs_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::heevd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::heevd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::hegvd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::hegvd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::hetrd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::hetrd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::hetrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::hetrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::orgbr_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::orgbr_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::orgtr_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::orgtr_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::orgqr_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::orgqr_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::ormrq_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::ormrq_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::ormqr_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::ormqr_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::ormtr_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::ormtr_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrf_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::potrf_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::potrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::potrs_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::potrs_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::potrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::potri_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::potri_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potri_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::potri_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::sytrf_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::sytrf_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::sytrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::sytrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::syevd_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::syevd_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::sygvd_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::sygvd_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::sytrd_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::sytrd_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::trtrs_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::trtrs_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::trtrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::trtrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::ungbr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::ungbr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::ungqr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::ungqr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::ungtr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::ungtr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::unmrq_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::unmrq_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::unmqr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::unmqr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::unmtr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::unmtr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::orgqr_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::orgqr_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getri_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::orgqr_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::orgqr_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::cusolver::ungqr_batch_scratchpad_size<std::complex<double>>
 #undef LAPACK_BACKEND
 };
diff --git a/src/lapack/backends/mkl_common/lapack_wrappers.cxx b/src/lapack/backends/mkl_common/lapack_wrappers.cxx
index a80f807b6..fa2069035 100644
--- a/src/lapack/backends/mkl_common/lapack_wrappers.cxx
+++ b/src/lapack/backends/mkl_common/lapack_wrappers.cxx
@@ -17,315 +17,315 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-oneapi::mkl::lapack::LAPACK_BACKEND::gebrd, oneapi::mkl::lapack::LAPACK_BACKEND::gebrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd, oneapi::mkl::lapack::LAPACK_BACKEND::gebrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf, oneapi::mkl::lapack::LAPACK_BACKEND::gerqf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf, oneapi::mkl::lapack::LAPACK_BACKEND::gerqf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf, oneapi::mkl::lapack::LAPACK_BACKEND::geqrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf, oneapi::mkl::lapack::LAPACK_BACKEND::geqrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf, oneapi::mkl::lapack::LAPACK_BACKEND::getrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf, oneapi::mkl::lapack::LAPACK_BACKEND::getrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri, oneapi::mkl::lapack::LAPACK_BACKEND::getri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri, oneapi::mkl::lapack::LAPACK_BACKEND::getri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs, oneapi::mkl::lapack::LAPACK_BACKEND::getrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs, oneapi::mkl::lapack::LAPACK_BACKEND::getrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd, oneapi::mkl::lapack::LAPACK_BACKEND::gesvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd, oneapi::mkl::lapack::LAPACK_BACKEND::gesvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::heevd, oneapi::mkl::lapack::LAPACK_BACKEND::heevd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hegvd, oneapi::mkl::lapack::LAPACK_BACKEND::hegvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrd, oneapi::mkl::lapack::LAPACK_BACKEND::hetrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrf, oneapi::mkl::lapack::LAPACK_BACKEND::hetrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgbr, oneapi::mkl::lapack::LAPACK_BACKEND::orgbr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr, oneapi::mkl::lapack::LAPACK_BACKEND::orgqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgtr, oneapi::mkl::lapack::LAPACK_BACKEND::orgtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormtr, oneapi::mkl::lapack::LAPACK_BACKEND::ormtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormrq, oneapi::mkl::lapack::LAPACK_BACKEND::ormrq,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormqr, oneapi::mkl::lapack::LAPACK_BACKEND::ormqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf, oneapi::mkl::lapack::LAPACK_BACKEND::potrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf, oneapi::mkl::lapack::LAPACK_BACKEND::potrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri, oneapi::mkl::lapack::LAPACK_BACKEND::potri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri, oneapi::mkl::lapack::LAPACK_BACKEND::potri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs, oneapi::mkl::lapack::LAPACK_BACKEND::potrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs, oneapi::mkl::lapack::LAPACK_BACKEND::potrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::syevd, oneapi::mkl::lapack::LAPACK_BACKEND::syevd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sygvd, oneapi::mkl::lapack::LAPACK_BACKEND::sygvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrd, oneapi::mkl::lapack::LAPACK_BACKEND::sytrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf, oneapi::mkl::lapack::LAPACK_BACKEND::sytrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf, oneapi::mkl::lapack::LAPACK_BACKEND::sytrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs, oneapi::mkl::lapack::LAPACK_BACKEND::trtrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs, oneapi::mkl::lapack::LAPACK_BACKEND::trtrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungbr, oneapi::mkl::lapack::LAPACK_BACKEND::ungbr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr, oneapi::mkl::lapack::LAPACK_BACKEND::ungqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungtr, oneapi::mkl::lapack::LAPACK_BACKEND::ungtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmrq, oneapi::mkl::lapack::LAPACK_BACKEND::unmrq,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmqr, oneapi::mkl::lapack::LAPACK_BACKEND::unmqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmtr, oneapi::mkl::lapack::LAPACK_BACKEND::unmtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd, oneapi::mkl::lapack::LAPACK_BACKEND::gebrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd, oneapi::mkl::lapack::LAPACK_BACKEND::gebrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf, oneapi::mkl::lapack::LAPACK_BACKEND::gerqf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf, oneapi::mkl::lapack::LAPACK_BACKEND::gerqf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf, oneapi::mkl::lapack::LAPACK_BACKEND::geqrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf, oneapi::mkl::lapack::LAPACK_BACKEND::geqrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf, oneapi::mkl::lapack::LAPACK_BACKEND::getrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf, oneapi::mkl::lapack::LAPACK_BACKEND::getrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri, oneapi::mkl::lapack::LAPACK_BACKEND::getri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri, oneapi::mkl::lapack::LAPACK_BACKEND::getri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs, oneapi::mkl::lapack::LAPACK_BACKEND::getrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs, oneapi::mkl::lapack::LAPACK_BACKEND::getrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd, oneapi::mkl::lapack::LAPACK_BACKEND::gesvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd, oneapi::mkl::lapack::LAPACK_BACKEND::gesvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::heevd, oneapi::mkl::lapack::LAPACK_BACKEND::heevd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hegvd, oneapi::mkl::lapack::LAPACK_BACKEND::hegvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrd, oneapi::mkl::lapack::LAPACK_BACKEND::hetrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrf, oneapi::mkl::lapack::LAPACK_BACKEND::hetrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgbr, oneapi::mkl::lapack::LAPACK_BACKEND::orgbr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr, oneapi::mkl::lapack::LAPACK_BACKEND::orgqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgtr, oneapi::mkl::lapack::LAPACK_BACKEND::orgtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormtr, oneapi::mkl::lapack::LAPACK_BACKEND::ormtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormrq, oneapi::mkl::lapack::LAPACK_BACKEND::ormrq,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormqr, oneapi::mkl::lapack::LAPACK_BACKEND::ormqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf, oneapi::mkl::lapack::LAPACK_BACKEND::potrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf, oneapi::mkl::lapack::LAPACK_BACKEND::potrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri, oneapi::mkl::lapack::LAPACK_BACKEND::potri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri, oneapi::mkl::lapack::LAPACK_BACKEND::potri,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs, oneapi::mkl::lapack::LAPACK_BACKEND::potrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs, oneapi::mkl::lapack::LAPACK_BACKEND::potrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::syevd, oneapi::mkl::lapack::LAPACK_BACKEND::syevd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sygvd, oneapi::mkl::lapack::LAPACK_BACKEND::sygvd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrd, oneapi::mkl::lapack::LAPACK_BACKEND::sytrd,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf, oneapi::mkl::lapack::LAPACK_BACKEND::sytrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf, oneapi::mkl::lapack::LAPACK_BACKEND::sytrf,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs, oneapi::mkl::lapack::LAPACK_BACKEND::trtrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs, oneapi::mkl::lapack::LAPACK_BACKEND::trtrs,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungbr, oneapi::mkl::lapack::LAPACK_BACKEND::ungbr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr, oneapi::mkl::lapack::LAPACK_BACKEND::ungqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungtr, oneapi::mkl::lapack::LAPACK_BACKEND::ungtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmrq, oneapi::mkl::lapack::LAPACK_BACKEND::unmrq,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmqr, oneapi::mkl::lapack::LAPACK_BACKEND::unmqr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmtr, oneapi::mkl::lapack::LAPACK_BACKEND::unmtr,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::heevd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::heevd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hegvd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hegvd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::hetrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgbr_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgbr_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgtr_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgtr_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormrq_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormrq_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormqr_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormqr_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormtr_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ormtr_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potri_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::syevd_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::syevd_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sygvd_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sygvd_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrd_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::sytrd_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungbr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungbr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungtr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungtr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmrq_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmrq_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmqr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmqr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmtr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::unmtr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<double>>
+oneapi::math::lapack::LAPACK_BACKEND::gebrd, oneapi::math::lapack::LAPACK_BACKEND::gebrd,
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd, oneapi::math::lapack::LAPACK_BACKEND::gebrd,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf, oneapi::math::lapack::LAPACK_BACKEND::gerqf,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf, oneapi::math::lapack::LAPACK_BACKEND::gerqf,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf, oneapi::math::lapack::LAPACK_BACKEND::geqrf,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf, oneapi::math::lapack::LAPACK_BACKEND::geqrf,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf, oneapi::math::lapack::LAPACK_BACKEND::getrf,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf, oneapi::math::lapack::LAPACK_BACKEND::getrf,
+    oneapi::math::lapack::LAPACK_BACKEND::getri, oneapi::math::lapack::LAPACK_BACKEND::getri,
+    oneapi::math::lapack::LAPACK_BACKEND::getri, oneapi::math::lapack::LAPACK_BACKEND::getri,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs, oneapi::math::lapack::LAPACK_BACKEND::getrs,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs, oneapi::math::lapack::LAPACK_BACKEND::getrs,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd, oneapi::math::lapack::LAPACK_BACKEND::gesvd,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd, oneapi::math::lapack::LAPACK_BACKEND::gesvd,
+    oneapi::math::lapack::LAPACK_BACKEND::heevd, oneapi::math::lapack::LAPACK_BACKEND::heevd,
+    oneapi::math::lapack::LAPACK_BACKEND::hegvd, oneapi::math::lapack::LAPACK_BACKEND::hegvd,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrd, oneapi::math::lapack::LAPACK_BACKEND::hetrd,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrf, oneapi::math::lapack::LAPACK_BACKEND::hetrf,
+    oneapi::math::lapack::LAPACK_BACKEND::orgbr, oneapi::math::lapack::LAPACK_BACKEND::orgbr,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr, oneapi::math::lapack::LAPACK_BACKEND::orgqr,
+    oneapi::math::lapack::LAPACK_BACKEND::orgtr, oneapi::math::lapack::LAPACK_BACKEND::orgtr,
+    oneapi::math::lapack::LAPACK_BACKEND::ormtr, oneapi::math::lapack::LAPACK_BACKEND::ormtr,
+    oneapi::math::lapack::LAPACK_BACKEND::ormrq, oneapi::math::lapack::LAPACK_BACKEND::ormrq,
+    oneapi::math::lapack::LAPACK_BACKEND::ormqr, oneapi::math::lapack::LAPACK_BACKEND::ormqr,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf, oneapi::math::lapack::LAPACK_BACKEND::potrf,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf, oneapi::math::lapack::LAPACK_BACKEND::potrf,
+    oneapi::math::lapack::LAPACK_BACKEND::potri, oneapi::math::lapack::LAPACK_BACKEND::potri,
+    oneapi::math::lapack::LAPACK_BACKEND::potri, oneapi::math::lapack::LAPACK_BACKEND::potri,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs, oneapi::math::lapack::LAPACK_BACKEND::potrs,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs, oneapi::math::lapack::LAPACK_BACKEND::potrs,
+    oneapi::math::lapack::LAPACK_BACKEND::syevd, oneapi::math::lapack::LAPACK_BACKEND::syevd,
+    oneapi::math::lapack::LAPACK_BACKEND::sygvd, oneapi::math::lapack::LAPACK_BACKEND::sygvd,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrd, oneapi::math::lapack::LAPACK_BACKEND::sytrd,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf, oneapi::math::lapack::LAPACK_BACKEND::sytrf,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf, oneapi::math::lapack::LAPACK_BACKEND::sytrf,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs, oneapi::math::lapack::LAPACK_BACKEND::trtrs,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs, oneapi::math::lapack::LAPACK_BACKEND::trtrs,
+    oneapi::math::lapack::LAPACK_BACKEND::ungbr, oneapi::math::lapack::LAPACK_BACKEND::ungbr,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr, oneapi::math::lapack::LAPACK_BACKEND::ungqr,
+    oneapi::math::lapack::LAPACK_BACKEND::ungtr, oneapi::math::lapack::LAPACK_BACKEND::ungtr,
+    oneapi::math::lapack::LAPACK_BACKEND::unmrq, oneapi::math::lapack::LAPACK_BACKEND::unmrq,
+    oneapi::math::lapack::LAPACK_BACKEND::unmqr, oneapi::math::lapack::LAPACK_BACKEND::unmqr,
+    oneapi::math::lapack::LAPACK_BACKEND::unmtr, oneapi::math::lapack::LAPACK_BACKEND::unmtr,
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd, oneapi::math::lapack::LAPACK_BACKEND::gebrd,
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd, oneapi::math::lapack::LAPACK_BACKEND::gebrd,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf, oneapi::math::lapack::LAPACK_BACKEND::gerqf,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf, oneapi::math::lapack::LAPACK_BACKEND::gerqf,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf, oneapi::math::lapack::LAPACK_BACKEND::geqrf,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf, oneapi::math::lapack::LAPACK_BACKEND::geqrf,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf, oneapi::math::lapack::LAPACK_BACKEND::getrf,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf, oneapi::math::lapack::LAPACK_BACKEND::getrf,
+    oneapi::math::lapack::LAPACK_BACKEND::getri, oneapi::math::lapack::LAPACK_BACKEND::getri,
+    oneapi::math::lapack::LAPACK_BACKEND::getri, oneapi::math::lapack::LAPACK_BACKEND::getri,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs, oneapi::math::lapack::LAPACK_BACKEND::getrs,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs, oneapi::math::lapack::LAPACK_BACKEND::getrs,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd, oneapi::math::lapack::LAPACK_BACKEND::gesvd,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd, oneapi::math::lapack::LAPACK_BACKEND::gesvd,
+    oneapi::math::lapack::LAPACK_BACKEND::heevd, oneapi::math::lapack::LAPACK_BACKEND::heevd,
+    oneapi::math::lapack::LAPACK_BACKEND::hegvd, oneapi::math::lapack::LAPACK_BACKEND::hegvd,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrd, oneapi::math::lapack::LAPACK_BACKEND::hetrd,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrf, oneapi::math::lapack::LAPACK_BACKEND::hetrf,
+    oneapi::math::lapack::LAPACK_BACKEND::orgbr, oneapi::math::lapack::LAPACK_BACKEND::orgbr,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr, oneapi::math::lapack::LAPACK_BACKEND::orgqr,
+    oneapi::math::lapack::LAPACK_BACKEND::orgtr, oneapi::math::lapack::LAPACK_BACKEND::orgtr,
+    oneapi::math::lapack::LAPACK_BACKEND::ormtr, oneapi::math::lapack::LAPACK_BACKEND::ormtr,
+    oneapi::math::lapack::LAPACK_BACKEND::ormrq, oneapi::math::lapack::LAPACK_BACKEND::ormrq,
+    oneapi::math::lapack::LAPACK_BACKEND::ormqr, oneapi::math::lapack::LAPACK_BACKEND::ormqr,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf, oneapi::math::lapack::LAPACK_BACKEND::potrf,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf, oneapi::math::lapack::LAPACK_BACKEND::potrf,
+    oneapi::math::lapack::LAPACK_BACKEND::potri, oneapi::math::lapack::LAPACK_BACKEND::potri,
+    oneapi::math::lapack::LAPACK_BACKEND::potri, oneapi::math::lapack::LAPACK_BACKEND::potri,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs, oneapi::math::lapack::LAPACK_BACKEND::potrs,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs, oneapi::math::lapack::LAPACK_BACKEND::potrs,
+    oneapi::math::lapack::LAPACK_BACKEND::syevd, oneapi::math::lapack::LAPACK_BACKEND::syevd,
+    oneapi::math::lapack::LAPACK_BACKEND::sygvd, oneapi::math::lapack::LAPACK_BACKEND::sygvd,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrd, oneapi::math::lapack::LAPACK_BACKEND::sytrd,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf, oneapi::math::lapack::LAPACK_BACKEND::sytrf,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf, oneapi::math::lapack::LAPACK_BACKEND::sytrf,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs, oneapi::math::lapack::LAPACK_BACKEND::trtrs,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs, oneapi::math::lapack::LAPACK_BACKEND::trtrs,
+    oneapi::math::lapack::LAPACK_BACKEND::ungbr, oneapi::math::lapack::LAPACK_BACKEND::ungbr,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr, oneapi::math::lapack::LAPACK_BACKEND::ungqr,
+    oneapi::math::lapack::LAPACK_BACKEND::ungtr, oneapi::math::lapack::LAPACK_BACKEND::ungtr,
+    oneapi::math::lapack::LAPACK_BACKEND::unmrq, oneapi::math::lapack::LAPACK_BACKEND::unmrq,
+    oneapi::math::lapack::LAPACK_BACKEND::unmqr, oneapi::math::lapack::LAPACK_BACKEND::unmqr,
+    oneapi::math::lapack::LAPACK_BACKEND::unmtr, oneapi::math::lapack::LAPACK_BACKEND::unmtr,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch,
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::gebrd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::gerqf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::gesvd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::heevd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::heevd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::hegvd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::hegvd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::hetrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgbr_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgbr_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgtr_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgtr_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::ormrq_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::ormrq_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::ormqr_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::ormqr_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::ormtr_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::ormtr_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potri_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::potri_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potri_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potri_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::syevd_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::syevd_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::sygvd_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::sygvd_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrd_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::sytrd_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::trtrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungbr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungbr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungtr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungtr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::unmrq_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::unmrq_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::unmqr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::unmqr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::unmtr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::unmtr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size<std::complex<double>>
diff --git a/src/lapack/backends/mkl_common/mkl_lapack.cxx b/src/lapack/backends/mkl_common/mkl_lapack.cxx
index 055531f7c..6c440b1c6 100644
--- a/src/lapack/backends/mkl_common/mkl_lapack.cxx
+++ b/src/lapack/backends/mkl_common/mkl_lapack.cxx
@@ -21,2339 +21,2647 @@ void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std:
            std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
            sycl::buffer<std::complex<float>>& tauq, sycl::buffer<std::complex<float>>& taup,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup,
+                                                           scratchpad, scratchpad_size));
 }
 void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& d, sycl::buffer<double>& e,
            sycl::buffer<double>& tauq, sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup,
+                                                           scratchpad, scratchpad_size));
 }
 void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
            sycl::buffer<float>& tauq, sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup,
+                                                           scratchpad, scratchpad_size));
 }
 void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& d,
            sycl::buffer<double>& e, sycl::buffer<std::complex<double>>& tauq,
            sycl::buffer<std::complex<double>>& taup, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup,
+                                                           scratchpad, scratchpad_size));
 }
 void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
            std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
            std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size));
 }
 void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
 void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
 void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
 void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
 void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
 void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
 void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
 void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size));
 }
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<double>& b, std::int64_t ldb, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size));
 }
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size));
 }
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
            std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                 scratchpad_size);
-}
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& s,
-           sycl::buffer<double>& u, std::int64_t ldu, sycl::buffer<double>& vt, std::int64_t ldvt,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad,
-                                 scratchpad_size);
-}
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& s,
-           sycl::buffer<float>& u, std::int64_t ldu, sycl::buffer<float>& vt, std::int64_t ldvt,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size));
+}
+void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+           std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+           sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
+           sycl::buffer<double>& vt, std::int64_t ldvt, sycl::buffer<double>& scratchpad,
+           std::int64_t scratchpad_size) {
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size));
+}
+void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+           std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+           sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
+           sycl::buffer<float>& vt, std::int64_t ldvt, sycl::buffer<float>& scratchpad,
+           std::int64_t scratchpad_size) {
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size));
 }
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+           std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
            sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size));
 }
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
+           std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
            sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size));
 }
-void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<float>& w,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz),
+                                                           detail::get_onemkl_uplo(uplo), n, a, lda,
+                                                           w, scratchpad, scratchpad_size));
 }
-void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& w,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz),
+                                                           detail::get_onemkl_uplo(uplo), n, a, lda,
+                                                           w, scratchpad, scratchpad_size));
 }
-void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
            std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb, sycl::buffer<float>& w,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hegvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size));
 }
-void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
            std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb, sycl::buffer<double>& w,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hegvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size));
 }
-void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<float>& d,
            sycl::buffer<float>& e, sycl::buffer<std::complex<float>>& tau,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrd(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size));
 }
-void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& d,
            sycl::buffer<double>& e, sycl::buffer<std::complex<double>>& tau,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrd(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size));
 }
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrf(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrf(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
-void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec),
+                                                           m, n, k, a, lda, tau, scratchpad,
+                                                           scratchpad_size));
 }
-void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec),
+                                                           m, n, k, a, lda, tau, scratchpad,
+                                                           scratchpad_size));
 }
 void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size));
 }
 void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size));
 }
-void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgtr(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size));
 }
-void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgtr(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size));
 }
-void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
+void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad,
+        scratchpad_size));
 }
-void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
+void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad,
+        scratchpad_size));
 }
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
 }
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-           sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
+           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
 }
-void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-           sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
+           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
 }
-void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
 }
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           a, lda, scratchpad, scratchpad_size));
 }
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           nrhs, a, lda, b, ldb, scratchpad,
+                                                           scratchpad_size));
 }
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           nrhs, a, lda, b, ldb, scratchpad,
+                                                           scratchpad_size));
 }
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           nrhs, a, lda, b, ldb, scratchpad,
+                                                           scratchpad_size));
 }
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n,
+                                                           nrhs, a, lda, b, ldb, scratchpad,
+                                                           scratchpad_size));
 }
-void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& w,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz),
+                                                           detail::get_onemkl_uplo(uplo), n, a, lda,
+                                                           w, scratchpad, scratchpad_size));
 }
-void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& w,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz),
+                                                           detail::get_onemkl_uplo(uplo), n, a, lda,
+                                                           w, scratchpad, scratchpad_size));
 }
-void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
            std::int64_t n, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
            std::int64_t ldb, sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sygvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size));
 }
-void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo,
            std::int64_t n, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
            std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sygvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size));
 }
-void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& d, sycl::buffer<double>& e,
            sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrd(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size));
 }
-void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
            sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrd(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size));
 }
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size));
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size));
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size));
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size));
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size));
 }
-void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec),
+                                                           m, n, k, a, lda, tau, scratchpad,
+                                                           scratchpad_size));
 }
-void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec),
+                                                           m, n, k, a, lda, tau, scratchpad,
+                                                           scratchpad_size));
 }
 void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size));
 }
 void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size));
 }
-void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungtr(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size));
 }
-void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungtr(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size));
 }
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
-}
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
-}
-void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+           sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
+}
+void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+           sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
 }
-void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+           sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
+}
+void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+           sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size));
 }
-void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad,
+        scratchpad_size));
 }
-void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad,
-                                 scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad,
+        scratchpad_size));
 }
 sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
                   std::int64_t lda, float* d, float* e, std::complex<float>* tauq,
                   std::complex<float>* taup, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(
+        queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
                   double* d, double* e, double* tauq, double* taup, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(
+        queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
                   float* d, float* e, float* tauq, float* taup, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(
+        queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
                   std::int64_t lda, double* d, double* e, std::complex<double>* tauq,
                   std::complex<double>* taup, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(
+        queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
                   float* tau, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
                   double* tau, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
                   std::int64_t lda, std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
                   std::int64_t lda, std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
                   std::int64_t lda, std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
                   double* tau, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
                   float* tau, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
                   std::int64_t lda, std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
                   std::int64_t lda, std::int64_t* ipiv, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(
+        queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
                   std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(
+        queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
                   std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(
+        queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
                   std::int64_t lda, std::int64_t* ipiv, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(
+        queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   std::int64_t* ipiv, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
                   std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
                   std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
 sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a, std::int64_t lda,
                   std::int64_t* ipiv, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                   std::int64_t nrhs, std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* b, std::int64_t ldb, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                   std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, double* b,
                   std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                   std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, float* b,
                   std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                   std::int64_t nrhs, std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* b, std::int64_t ldb, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda,
+                                     ipiv, b, ldb, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                   std::int64_t m, std::int64_t n, double* a, std::int64_t lda, double* s, double* u,
                   std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                   std::int64_t m, std::int64_t n, float* a, std::int64_t lda, float* s, float* u,
                   std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                   std::int64_t m, std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   float* s, std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
                   std::int64_t ldvt, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                   std::int64_t m, std::int64_t n, std::complex<double>* a, std::int64_t lda,
                   double* s, std::complex<double>* u, std::int64_t ldu, std::complex<double>* vt,
                   std::int64_t ldvt, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s,
+        u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, float* w,
+sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                  std::int64_t n, std::complex<float>* a, std::int64_t lda, float* w,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd(
+        queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w,
+        scratchpad, scratchpad_size, dependencies));
 }
-sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, double* w,
+sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                  std::int64_t n, std::complex<double>* a, std::int64_t lda, double* w,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd(
+        queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w,
+        scratchpad, scratchpad_size, dependencies));
 }
-sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a, std::int64_t lda,
+sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* b, std::int64_t ldb, float* w,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                  std::complex<double>* b, std::int64_t ldb, double* w,
+sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                  std::int64_t lda, std::complex<double>* b, std::int64_t ldb, double* w,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, float* d, float* e,
                   std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, d, e, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, double* d, double* e,
                   std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, d, e, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                   std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau,
+                                     scratchpad, scratchpad_size, dependencies));
 }
-sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                   std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau,
+                                     scratchpad, scratchpad_size, dependencies));
 }
 sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, double* a,
                   std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr(
+        queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, float* a,
                   std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr(
+        queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a,
                   std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a,
                   std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float* a,
+sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float* a,
                   std::int64_t lda, float* tau, float* c, std::int64_t ldc, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size,
+        dependencies));
 }
-sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double* a,
+sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double* a,
                   std::int64_t lda, double* tau, double* c, std::int64_t ldc, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size,
+        dependencies));
 }
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                   float* tau, float* c, std::int64_t ldc, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                   double* tau, double* c, std::int64_t ldc, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                   double* tau, double* c, std::int64_t ldc, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                   float* tau, float* c, std::int64_t ldc, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a,
                   std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a,
                   std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a,
                   std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a,
                   std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                   float* a, std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, nrhs, a, lda, b, ldb, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                   double* a, std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, nrhs, a, lda, b, ldb, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                   std::int64_t ldb, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, nrhs, a, lda, b, ldb, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                   std::int64_t ldb, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, nrhs, a, lda, b, ldb, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  double* a, std::int64_t lda, double* w, double* scratchpad,
+sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                  std::int64_t n, double* a, std::int64_t lda, double* w, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd(
+        queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w,
+        scratchpad, scratchpad_size, dependencies));
 }
-sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  float* a, std::int64_t lda, float* w, float* scratchpad,
+sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
+                  std::int64_t n, float* a, std::int64_t lda, float* w, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd(
+        queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w,
+        scratchpad, scratchpad_size, dependencies));
 }
-sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b,
+sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b,
                   std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b,
+sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b,
                   std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b,
+        ldb, w, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a,
                   std::int64_t lda, double* d, double* e, double* tau, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, d, e, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a,
                   std::int64_t lda, float* d, float* e, float* tau, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, d, e, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a,
                   std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a,
                   std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, ipiv, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                  std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                  std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                  std::int64_t ldb, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size,
+        dependencies));
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double* a,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a,
                   std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size,
+        dependencies));
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
                   std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size,
+        dependencies));
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                   std::int64_t ldb, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size,
+        dependencies));
 }
-sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                   std::int64_t k, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau,
+                                     scratchpad, scratchpad_size, dependencies));
 }
-sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
+sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,
                   std::int64_t k, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad,
-                                        scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau,
+                                     scratchpad, scratchpad_size, dependencies));
 }
 sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr(
+        queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* tau,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr(
+        queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* tau,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size,
-                                        dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo),
+                                                               n, a, lda, tau, scratchpad,
+                                                               scratchpad_size, dependencies));
 }
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float>* a,
                   std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
                   std::int64_t ldc, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double>* a,
                   std::int64_t lda, std::complex<double>* tau, std::complex<double>* c,
                   std::int64_t ldc, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float>* a,
                   std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
                   std::int64_t ldc, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double>* a,
                   std::int64_t lda, std::complex<double>* tau, std::complex<double>* c,
                   std::int64_t ldc, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda,
+        tau, c, ldc, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                   std::complex<float>* c, std::int64_t ldc, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size,
+        dependencies));
 }
-sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* tau,
                   std::complex<double>* c, std::int64_t ldc, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc,
-                                        scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size,
+        dependencies));
 }
 void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
                  std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(
+        queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size));
 }
 void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
                  std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(
+        queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size));
 }
 void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(
+        queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size));
 }
 void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(
+        queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size));
 }
 void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(
+        queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
 void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(
+        queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
 void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                  std::int64_t stride_ipiv, std::int64_t batch_size,
                  sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(
+        queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
 void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                  std::int64_t stride_ipiv, std::int64_t batch_size,
                  sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(
+        queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv, sycl::buffer<float>& b,
                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                  sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv,
-                                       b, ldb, stride_b, batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size));
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv,
-                                       b, ldb, stride_b, batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size));
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<std::complex<float>>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv,
-                                       b, ldb, stride_b, batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size));
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<std::complex<double>>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv,
-                                       b, ldb, stride_b, batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size));
 }
 void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                  std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(
+        queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
 void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                  std::int64_t stride_ipiv, std::int64_t batch_size,
                  sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(
+        queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
 void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(
+        queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
 void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size,
-                                       scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(
+        queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size));
 }
 void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                  sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<float>& tau, std::int64_t stride_tau, std::int64_t batch_size,
                  sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a,
+                                                                 tau, stride_tau, batch_size,
+                                                                 scratchpad, scratchpad_size));
 }
 void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<double>& tau, std::int64_t stride_tau, std::int64_t batch_size,
                  sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a,
+                                                                 tau, stride_tau, batch_size,
+                                                                 scratchpad, scratchpad_size));
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                 std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                 sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad,
-                                       scratchpad_size);
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                 sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                 std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                 std::int64_t scratchpad_size) {
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda,
+                                           stride_a, batch_size, scratchpad, scratchpad_size));
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad,
-                                       scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda,
+                                           stride_a, batch_size, scratchpad, scratchpad_size));
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad,
-                                       scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda,
+                                           stride_a, batch_size, scratchpad, scratchpad_size));
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad,
-                                       scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(
+        ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda,
+                                           stride_a, batch_size, scratchpad, scratchpad_size));
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size));
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size));
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<float>>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size));
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<double>>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size));
 }
 void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a,
+                                                                 tau, stride_tau, batch_size,
+                                                                 scratchpad, scratchpad_size));
 }
 void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
                  std::int64_t scratchpad_size) {
-    ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                       batch_size, scratchpad, scratchpad_size);
+    RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a,
+                                                                 tau, stride_tau, batch_size,
+                                                                 scratchpad, scratchpad_size));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
                         std::int64_t lda, std::int64_t stride_a, float* tau,
                         std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
                         std::int64_t lda, std::int64_t stride_a, double* tau,
                         std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
                         std::int64_t lda, std::int64_t stride_a, std::complex<float>* tau,
                         std::int64_t stride_tau, std::int64_t batch_size,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
                         std::int64_t lda, std::int64_t stride_a, std::complex<double>* tau,
                         std::int64_t stride_tau, std::int64_t batch_size,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a,
                         std::int64_t* lda, float** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a,
                         std::int64_t* lda, double** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                         std::complex<float>** a, std::int64_t* lda, std::complex<float>** tau,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                         std::complex<double>** a, std::int64_t* lda, std::complex<double>** tau,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a,
                         std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a,
                         std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
                         std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                         std::complex<float>** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
                         std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes,
                         float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes,
                         double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
                         std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<float>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<double>** a,
                         std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<double>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size, float* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv,
-                                              stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
-                                              scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size, double* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv,
-                                              stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
-                                              scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
                         std::int64_t batch_size, std::complex<float>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv,
-                                              stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
-                                              scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
                         std::int64_t batch_size, std::complex<double>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv,
-                                              stride_ipiv, b, ldb, stride_b, batch_size, scratchpad,
-                                              scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b,
+        ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv,
                         float** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                              group_count, group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count,
+        group_sizes, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv,
                         double** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                              group_count, group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count,
+        group_sizes, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, std::complex<float>** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::complex<float>** b, std::int64_t* ldb,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                              group_count, group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count,
+        group_sizes, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::complex<double>** b, std::int64_t* ldb,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
-                                              group_count, group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count,
+        group_sizes, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                         float* a, std::int64_t lda, std::int64_t stride_a, float* tau,
                         std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                         double* a, std::int64_t lda, std::int64_t stride_a, double* tau,
                         std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                         float** a, std::int64_t* lda, float** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                         double** a, std::int64_t* lda, double** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                         float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                         double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t batch_size, std::complex<float>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t batch_size, std::complex<double>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, float** a,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, float** a,
                         std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes,
                         float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, double** a,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, double** a,
                         std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes,
                         double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                         std::complex<float>** a, std::int64_t* lda, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<float>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                         std::complex<double>** a, std::int64_t* lda, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<double>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad,
+        scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a,
                         float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                         float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
-                                              stride_b, batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a,
                         double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                         double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
-                                              stride_b, batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
-                                              stride_b, batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb,
-                                              stride_b, batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b,
+        batch_size, scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                         std::int64_t* nrhs, float** a, std::int64_t* lda, float** b,
                         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
                         float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count,
-                                              group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes,
+        scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                         std::int64_t* nrhs, double** a, std::int64_t* lda, double** b,
                         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
                         double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count,
-                                              group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes,
+        scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                         std::int64_t* nrhs, std::complex<float>** a, std::int64_t* lda,
                         std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<float>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count,
-                                              group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes,
+        scratchpad, scratchpad_size, dependencies));
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
                         std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda,
                         std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<double>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count,
-                                              group_sizes, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes,
+        scratchpad, scratchpad_size, dependencies));
 }
 sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                         std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                         std::complex<float>* tau, std::int64_t stride_tau, std::int64_t batch_size,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                         std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                         std::complex<double>* tau, std::int64_t stride_tau, std::int64_t batch_size,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
-                                              batch_size, scratchpad, scratchpad_size,
-                                              dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau,
+                                           batch_size, scratchpad, scratchpad_size, dependencies));
 }
 sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                         std::complex<float>** a, std::int64_t* lda, std::complex<float>** tau,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
                         std::complex<double>** a, std::int64_t* lda, std::complex<double>** tau,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
-    return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
-                                              scratchpad, scratchpad_size, dependencies);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes,
+                                           scratchpad, scratchpad_size, dependencies));
 }
 
 template <>
 std::int64_t gebrd_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                           std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gebrd_scratchpad_size<float>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gebrd_scratchpad_size<float>(queue, m, n, lda));
 }
 template <>
 std::int64_t gebrd_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                            std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gebrd_scratchpad_size<double>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gebrd_scratchpad_size<double>(queue, m, n, lda));
 }
 template <>
 std::int64_t gebrd_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gebrd_scratchpad_size<std::complex<float>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gebrd_scratchpad_size<std::complex<float>>(queue, m, n, lda));
 }
 template <>
 std::int64_t gebrd_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gebrd_scratchpad_size<std::complex<double>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gebrd_scratchpad_size<std::complex<double>>(queue, m, n, lda));
 }
 template <>
 std::int64_t gerqf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                           std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gerqf_scratchpad_size<float>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gerqf_scratchpad_size<float>(queue, m, n, lda));
 }
 template <>
 std::int64_t gerqf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                            std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gerqf_scratchpad_size<double>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gerqf_scratchpad_size<double>(queue, m, n, lda));
 }
 template <>
 std::int64_t gerqf_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gerqf_scratchpad_size<std::complex<float>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gerqf_scratchpad_size<std::complex<float>>(queue, m, n, lda));
 }
 template <>
 std::int64_t gerqf_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::gerqf_scratchpad_size<std::complex<double>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gerqf_scratchpad_size<std::complex<double>>(queue, m, n, lda));
 }
 template <>
 std::int64_t geqrf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                           std::int64_t lda) {
-    return ::oneapi::mkl::lapack::geqrf_scratchpad_size<float>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_scratchpad_size<float>(queue, m, n, lda));
 }
 template <>
 std::int64_t geqrf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                            std::int64_t lda) {
-    return ::oneapi::mkl::lapack::geqrf_scratchpad_size<double>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_scratchpad_size<double>(queue, m, n, lda));
 }
 template <>
 std::int64_t geqrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::geqrf_scratchpad_size<std::complex<float>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_scratchpad_size<std::complex<float>>(queue, m, n, lda));
 }
 template <>
 std::int64_t geqrf_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::geqrf_scratchpad_size<std::complex<double>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_scratchpad_size<std::complex<double>>(queue, m, n, lda));
 }
 template <>
-std::int64_t gesvd_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                          oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                          std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) {
-    return ::oneapi::mkl::lapack::gesvd_scratchpad_size<float>(queue, jobu, jobvt, m, n, lda, ldu,
-                                                               ldvt);
+std::int64_t gesvd_scratchpad_size<float>(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                          oneapi::math::jobsvd jobvt, std::int64_t m,
+                                          std::int64_t n, std::int64_t lda, std::int64_t ldu,
+                                          std::int64_t ldvt) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size<float>(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu,
+        ldvt));
 }
 template <>
-std::int64_t gesvd_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                           oneapi::mkl::jobsvd jobvt, std::int64_t m,
+std::int64_t gesvd_scratchpad_size<double>(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                           oneapi::math::jobsvd jobvt, std::int64_t m,
                                            std::int64_t n, std::int64_t lda, std::int64_t ldu,
                                            std::int64_t ldvt) {
-    return ::oneapi::mkl::lapack::gesvd_scratchpad_size<double>(queue, jobu, jobvt, m, n, lda, ldu,
-                                                                ldvt);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size<double>(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu,
+        ldvt));
 }
 template <>
 std::int64_t gesvd_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                        oneapi::mkl::jobsvd jobu,
-                                                        oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                                                        oneapi::math::jobsvd jobu,
+                                                        oneapi::math::jobsvd jobvt, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda,
                                                         std::int64_t ldu, std::int64_t ldvt) {
-    return ::oneapi::mkl::lapack::gesvd_scratchpad_size<std::complex<float>>(queue, jobu, jobvt, m,
-                                                                             n, lda, ldu, ldvt);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu,
+        ldvt));
 }
 template <>
 std::int64_t gesvd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                         oneapi::mkl::jobsvd jobu,
-                                                         oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                                                         oneapi::math::jobsvd jobu,
+                                                         oneapi::math::jobsvd jobvt, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda,
                                                          std::int64_t ldu, std::int64_t ldvt) {
-    return ::oneapi::mkl::lapack::gesvd_scratchpad_size<std::complex<double>>(queue, jobu, jobvt, m,
-                                                                              n, lda, ldu, ldvt);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::gesvd_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda,
+            ldu, ldvt));
 }
 template <>
 std::int64_t getrf_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                           std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getrf_scratchpad_size<float>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_scratchpad_size<float>(queue, m, n, lda));
 }
 template <>
 std::int64_t getrf_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                            std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getrf_scratchpad_size<double>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_scratchpad_size<double>(queue, m, n, lda));
 }
 template <>
 std::int64_t getrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getrf_scratchpad_size<std::complex<float>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_scratchpad_size<std::complex<float>>(queue, m, n, lda));
 }
 template <>
 std::int64_t getrf_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getrf_scratchpad_size<std::complex<double>>(queue, m, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_scratchpad_size<std::complex<double>>(queue, m, n, lda));
 }
 template <>
 std::int64_t getri_scratchpad_size<float>(sycl::queue& queue, std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getri_scratchpad_size<float>(queue, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_scratchpad_size<float>(queue, n, lda));
 }
 template <>
 std::int64_t getri_scratchpad_size<double>(sycl::queue& queue, std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getri_scratchpad_size<double>(queue, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_scratchpad_size<double>(queue, n, lda));
 }
 template <>
 std::int64_t getri_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t n,
                                                         std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getri_scratchpad_size<std::complex<float>>(queue, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_scratchpad_size<std::complex<float>>(queue, n, lda));
 }
 template <>
 std::int64_t getri_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t n,
                                                          std::int64_t lda) {
-    return ::oneapi::mkl::lapack::getri_scratchpad_size<std::complex<double>>(queue, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_scratchpad_size<std::complex<double>>(queue, n, lda));
 }
 template <>
-std::int64_t getrs_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::transpose trans,
+std::int64_t getrs_scratchpad_size<float>(sycl::queue& queue, oneapi::math::transpose trans,
                                           std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                           std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::getrs_scratchpad_size<float>(queue, trans, n, nrhs, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size<float>(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t getrs_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::transpose trans,
+std::int64_t getrs_scratchpad_size<double>(sycl::queue& queue, oneapi::math::transpose trans,
                                            std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                            std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::getrs_scratchpad_size<double>(queue, trans, n, nrhs, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size<double>(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb));
 }
 template <>
 std::int64_t getrs_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                        oneapi::mkl::transpose trans,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t n, std::int64_t nrhs,
                                                         std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::getrs_scratchpad_size<std::complex<float>>(queue, trans, n, nrhs,
-                                                                             lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb));
 }
 template <>
 std::int64_t getrs_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                         oneapi::mkl::transpose trans,
+                                                         oneapi::math::transpose trans,
                                                          std::int64_t n, std::int64_t nrhs,
                                                          std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::getrs_scratchpad_size<std::complex<double>>(queue, trans, n, nrhs,
-                                                                              lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t heevd_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t heevd_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::job jobz,
+                                                        oneapi::math::uplo uplo, std::int64_t n,
                                                         std::int64_t lda) {
-    return ::oneapi::mkl::lapack::heevd_scratchpad_size<std::complex<float>>(queue, jobz, uplo, n,
-                                                                             lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t heevd_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t heevd_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
                                                          std::int64_t lda) {
-    return ::oneapi::mkl::lapack::heevd_scratchpad_size<std::complex<double>>(queue, jobz, uplo, n,
-                                                                              lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::heevd_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
 std::int64_t hegvd_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t itype,
-                                                        oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
+                                                        oneapi::math::job jobz,
+                                                        oneapi::math::uplo uplo, std::int64_t n,
                                                         std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::hegvd_scratchpad_size<std::complex<float>>(queue, itype, jobz,
-                                                                             uplo, n, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd_scratchpad_size<std::complex<float>>(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, ldb));
 }
 template <>
 std::int64_t hegvd_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t itype,
-                                                         oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
+                                                         oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
                                                          std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::hegvd_scratchpad_size<std::complex<double>>(queue, itype, jobz,
-                                                                              uplo, n, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::hegvd_scratchpad_size<std::complex<double>>(
+            queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda,
+            ldb));
 }
 template <>
-std::int64_t hetrd_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t hetrd_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::hetrd_scratchpad_size<std::complex<float>>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t hetrd_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::hetrd_scratchpad_size<std::complex<double>>(queue, uplo, n, lda);
+std::int64_t hetrd_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::hetrd_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::hetrf_scratchpad_size<std::complex<float>>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::hetrf_scratchpad_size<std::complex<double>>(queue, uplo, n, lda);
+std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::hetrf_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t orgbr_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::generate vect,
+std::int64_t orgbr_scratchpad_size<float>(sycl::queue& queue, oneapi::math::generate vect,
                                           std::int64_t m, std::int64_t n, std::int64_t k,
                                           std::int64_t lda) {
-    return ::oneapi::mkl::lapack::orgbr_scratchpad_size<float>(queue, vect, m, n, k, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr_scratchpad_size<float>(
+        queue, detail::get_onemkl_generate(vect), m, n, k, lda));
 }
 template <>
-std::int64_t orgbr_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::generate vect,
+std::int64_t orgbr_scratchpad_size<double>(sycl::queue& queue, oneapi::math::generate vect,
                                            std::int64_t m, std::int64_t n, std::int64_t k,
                                            std::int64_t lda) {
-    return ::oneapi::mkl::lapack::orgbr_scratchpad_size<double>(queue, vect, m, n, k, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr_scratchpad_size<double>(
+        queue, detail::get_onemkl_generate(vect), m, n, k, lda));
 }
 template <>
-std::int64_t orgtr_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t orgtr_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::orgtr_scratchpad_size<float>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t orgtr_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t orgtr_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::orgtr_scratchpad_size<double>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
 std::int64_t orgqr_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                           std::int64_t k, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::orgqr_scratchpad_size<float>(queue, m, n, k, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgqr_scratchpad_size<float>(queue, m, n, k, lda));
 }
 template <>
 std::int64_t orgqr_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                            std::int64_t k, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::orgqr_scratchpad_size<double>(queue, m, n, k, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::orgqr_scratchpad_size<double>(queue, m, n, k, lda));
 }
 template <>
-std::int64_t ormrq_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                          oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormrq_scratchpad_size<float>(sycl::queue& queue, oneapi::math::side side,
+                                          oneapi::math::transpose trans, std::int64_t m,
                                           std::int64_t n, std::int64_t k, std::int64_t lda,
                                           std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::ormrq_scratchpad_size<float>(queue, side, trans, m, n, k, lda,
-                                                               ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq_scratchpad_size<float>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+        ldc));
 }
 template <>
-std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                           oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::math::side side,
+                                           oneapi::math::transpose trans, std::int64_t m,
                                            std::int64_t n, std::int64_t k, std::int64_t lda,
                                            std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::ormrq_scratchpad_size<double>(queue, side, trans, m, n, k, lda,
-                                                                ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq_scratchpad_size<double>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+        ldc));
 }
 template <>
-std::int64_t ormqr_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                          oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormqr_scratchpad_size<float>(sycl::queue& queue, oneapi::math::side side,
+                                          oneapi::math::transpose trans, std::int64_t m,
                                           std::int64_t n, std::int64_t k, std::int64_t lda,
                                           std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::ormqr_scratchpad_size<float>(queue, side, trans, m, n, k, lda,
-                                                               ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr_scratchpad_size<float>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+        ldc));
 }
 template <>
-std::int64_t ormqr_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                           oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormqr_scratchpad_size<double>(sycl::queue& queue, oneapi::math::side side,
+                                           oneapi::math::transpose trans, std::int64_t m,
                                            std::int64_t n, std::int64_t k, std::int64_t lda,
                                            std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::ormqr_scratchpad_size<double>(queue, side, trans, m, n, k, lda,
-                                                                ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr_scratchpad_size<double>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+        ldc));
 }
 template <>
-std::int64_t ormtr_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                          oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+std::int64_t ormtr_scratchpad_size<float>(sycl::queue& queue, oneapi::math::side side,
+                                          oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                           std::int64_t m, std::int64_t n, std::int64_t lda,
                                           std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::ormtr_scratchpad_size<float>(queue, side, uplo, trans, m, n, lda,
-                                                               ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr_scratchpad_size<float>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, lda, ldc));
 }
 template <>
-std::int64_t ormtr_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                           oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+std::int64_t ormtr_scratchpad_size<double>(sycl::queue& queue, oneapi::math::side side,
+                                           oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                            std::int64_t m, std::int64_t n, std::int64_t lda,
                                            std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::ormtr_scratchpad_size<double>(queue, side, uplo, trans, m, n, lda,
-                                                                ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr_scratchpad_size<double>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, lda, ldc));
 }
 template <>
-std::int64_t potrf_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potrf_scratchpad_size<float>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t potrf_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potrf_scratchpad_size<double>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t potrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potrf_scratchpad_size<std::complex<float>>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t potrf_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potrf_scratchpad_size<std::complex<double>>(queue, uplo, n, lda);
+std::int64_t potrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrf_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t potrs_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                           std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::potrs_scratchpad_size<float>(queue, uplo, n, nrhs, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t potrs_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                            std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::potrs_scratchpad_size<double>(queue, uplo, n, nrhs, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t potrs_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t nrhs,
                                                         std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::potrs_scratchpad_size<std::complex<float>>(queue, uplo, n, nrhs,
-                                                                             lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t potrs_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t nrhs,
-                                                         std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::potrs_scratchpad_size<std::complex<double>>(queue, uplo, n, nrhs,
-                                                                              lda, ldb);
+std::int64_t potrs_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t nrhs, std::int64_t lda,
+                                                         std::int64_t ldb) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrs_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t potri_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potri_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potri_scratchpad_size<float>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t potri_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potri_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potri_scratchpad_size<double>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t potri_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potri_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potri_scratchpad_size<std::complex<float>>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t potri_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::potri_scratchpad_size<std::complex<double>>(queue, uplo, n, lda);
+std::int64_t potri_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potri_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t sytrf_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t sytrf_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::sytrf_scratchpad_size<float>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t sytrf_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t sytrf_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::sytrf_scratchpad_size<double>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t sytrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t sytrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::sytrf_scratchpad_size<std::complex<float>>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t sytrf_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::sytrf_scratchpad_size<std::complex<double>>(queue, uplo, n, lda);
+std::int64_t sytrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::sytrf_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t syevd_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::job jobz,
-                                          oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t syevd_scratchpad_size<float>(sycl::queue& queue, oneapi::math::job jobz,
+                                          oneapi::math::uplo uplo, std::int64_t n,
                                           std::int64_t lda) {
-    return ::oneapi::mkl::lapack::syevd_scratchpad_size<float>(queue, jobz, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd_scratchpad_size<float>(
+        queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t syevd_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::job jobz,
-                                           oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t syevd_scratchpad_size<double>(sycl::queue& queue, oneapi::math::job jobz,
+                                           oneapi::math::uplo uplo, std::int64_t n,
                                            std::int64_t lda) {
-    return ::oneapi::mkl::lapack::syevd_scratchpad_size<double>(queue, jobz, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd_scratchpad_size<double>(
+        queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
 std::int64_t sygvd_scratchpad_size<float>(sycl::queue& queue, std::int64_t itype,
-                                          oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                          oneapi::math::job jobz, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::sygvd_scratchpad_size<float>(queue, itype, jobz, uplo, n, lda,
-                                                               ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd_scratchpad_size<float>(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, ldb));
 }
 template <>
 std::int64_t sygvd_scratchpad_size<double>(sycl::queue& queue, std::int64_t itype,
-                                           oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                           oneapi::math::job jobz, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t lda, std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::sygvd_scratchpad_size<double>(queue, itype, jobz, uplo, n, lda,
-                                                                ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd_scratchpad_size<double>(
+        queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, ldb));
 }
 template <>
-std::int64_t sytrd_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t sytrd_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::sytrd_scratchpad_size<float>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t sytrd_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t sytrd_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::sytrd_scratchpad_size<double>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t trtrs_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                          oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+std::int64_t trtrs_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
+                                          oneapi::math::transpose trans, oneapi::math::diag diag,
                                           std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                           std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::trtrs_scratchpad_size<float>(queue, uplo, trans, diag, n, nrhs,
-                                                               lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t trtrs_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                           oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+std::int64_t trtrs_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
+                                           oneapi::math::transpose trans, oneapi::math::diag diag,
                                            std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                            std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::trtrs_scratchpad_size<double>(queue, uplo, trans, diag, n, nrhs,
-                                                                lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t trtrs_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
-                                                        oneapi::mkl::diag diag, std::int64_t n,
+std::int64_t trtrs_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                        oneapi::math::transpose trans,
+                                                        oneapi::math::diag diag, std::int64_t n,
                                                         std::int64_t nrhs, std::int64_t lda,
                                                         std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::trtrs_scratchpad_size<std::complex<float>>(
-        queue, uplo, trans, diag, n, nrhs, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+        detail::get_onemkl_diag(diag), n, nrhs, lda, ldb));
 }
 template <>
-std::int64_t trtrs_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         oneapi::mkl::diag diag, std::int64_t n,
+std::int64_t trtrs_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo,
+                                                         oneapi::math::transpose trans,
+                                                         oneapi::math::diag diag, std::int64_t n,
                                                          std::int64_t nrhs, std::int64_t lda,
                                                          std::int64_t ldb) {
-    return ::oneapi::mkl::lapack::trtrs_scratchpad_size<std::complex<double>>(
-        queue, uplo, trans, diag, n, nrhs, lda, ldb);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::trtrs_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans),
+            detail::get_onemkl_diag(diag), n, nrhs, lda, ldb));
 }
 template <>
 std::int64_t ungbr_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                        oneapi::mkl::generate vect, std::int64_t m,
+                                                        oneapi::math::generate vect, std::int64_t m,
                                                         std::int64_t n, std::int64_t k,
                                                         std::int64_t lda) {
-    return ::oneapi::mkl::lapack::ungbr_scratchpad_size<std::complex<float>>(queue, vect, m, n, k,
-                                                                             lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungbr_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_generate(vect), m, n, k, lda));
 }
 template <>
 std::int64_t ungbr_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                         oneapi::mkl::generate vect, std::int64_t m,
-                                                         std::int64_t n, std::int64_t k,
-                                                         std::int64_t lda) {
-    return ::oneapi::mkl::lapack::ungbr_scratchpad_size<std::complex<double>>(queue, vect, m, n, k,
-                                                                              lda);
+                                                         oneapi::math::generate vect,
+                                                         std::int64_t m, std::int64_t n,
+                                                         std::int64_t k, std::int64_t lda) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungbr_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_generate(vect), m, n, k, lda));
 }
 template <>
 std::int64_t ungqr_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t k,
                                                         std::int64_t lda) {
-    return ::oneapi::mkl::lapack::ungqr_scratchpad_size<std::complex<float>>(queue, m, n, k, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_scratchpad_size<std::complex<float>>(queue, m, n, k, lda));
 }
 template <>
 std::int64_t ungqr_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t k,
                                                          std::int64_t lda) {
-    return ::oneapi::mkl::lapack::ungqr_scratchpad_size<std::complex<double>>(queue, m, n, k, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_scratchpad_size<std::complex<double>>(queue, m, n, k, lda));
 }
 template <>
-std::int64_t ungtr_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t ungtr_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::ungtr_scratchpad_size<std::complex<float>>(queue, uplo, n, lda);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t ungtr_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
-    return ::oneapi::mkl::lapack::ungtr_scratchpad_size<std::complex<double>>(queue, uplo, n, lda);
+std::int64_t ungtr_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungtr_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda));
 }
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmrq_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t k, std::int64_t lda,
                                                         std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::unmrq_scratchpad_size<std::complex<float>>(queue, side, trans, m,
-                                                                             n, k, lda, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+        ldc));
 }
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::unmrq_scratchpad_size<std::complex<double>>(queue, side, trans, m,
-                                                                              n, k, lda, ldc);
+std::int64_t unmrq_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::unmrq_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+            ldc));
 }
 template <>
-std::int64_t unmqr_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmqr_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t k, std::int64_t lda,
                                                         std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::unmqr_scratchpad_size<std::complex<float>>(queue, side, trans, m,
-                                                                             n, k, lda, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+        ldc));
 }
 template <>
-std::int64_t unmqr_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::unmqr_scratchpad_size<std::complex<double>>(queue, side, trans, m,
-                                                                              n, k, lda, ldc);
+std::int64_t unmqr_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::unmqr_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda,
+            ldc));
 }
 template <>
-std::int64_t unmtr_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmtr_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::uplo uplo,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t lda, std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::unmtr_scratchpad_size<std::complex<float>>(queue, side, uplo,
-                                                                             trans, m, n, lda, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr_scratchpad_size<std::complex<float>>(
+        queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+        detail::get_onemkl_transpose(trans), m, n, lda, ldc));
 }
 template <>
-std::int64_t unmtr_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
+std::int64_t unmtr_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::side side,
+                                                         oneapi::math::uplo uplo,
+                                                         oneapi::math::transpose trans,
                                                          std::int64_t m, std::int64_t n,
                                                          std::int64_t lda, std::int64_t ldc) {
-    return ::oneapi::mkl::lapack::unmtr_scratchpad_size<std::complex<double>>(
-        queue, side, uplo, trans, m, n, lda, ldc);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::unmtr_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo),
+            detail::get_onemkl_transpose(trans), m, n, lda, ldc));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t lda, std::int64_t stride_a,
                                                 std::int64_t stride_ipiv, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<float>(queue, m, n, lda, stride_a,
-                                                                     stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size<float>(
+        queue, m, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t stride_ipiv,
                                                  std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<double>(queue, m, n, lda, stride_a,
-                                                                      stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size<double>(
+        queue, m, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t m,
@@ -2361,8 +2669,9 @@ std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue
                                                               std::int64_t stride_a,
                                                               std::int64_t stride_ipiv,
                                                               std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<float>>(
-        queue, m, n, lda, stride_a, stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<float>>(
+            queue, m, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t m,
@@ -2370,23 +2679,24 @@ std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
                                                                std::int64_t stride_a,
                                                                std::int64_t stride_ipiv,
                                                                std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<double>>(
-        queue, m, n, lda, stride_a, stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<double>>(
+            queue, m, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t n,
                                                 std::int64_t lda, std::int64_t stride_a,
                                                 std::int64_t stride_ipiv, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<float>(queue, n, lda, stride_a,
-                                                                     stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size<float>(
+        queue, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t n,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t stride_ipiv,
                                                  std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<double>(queue, n, lda, stride_a,
-                                                                      stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size<double>(
+        queue, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t n,
@@ -2394,8 +2704,9 @@ std::int64_t getri_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue
                                                               std::int64_t stride_a,
                                                               std::int64_t stride_ipiv,
                                                               std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<float>>(
-        queue, n, lda, stride_a, stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<float>>(
+            queue, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t n,
@@ -2403,56 +2714,63 @@ std::int64_t getri_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
                                                                std::int64_t stride_a,
                                                                std::int64_t stride_ipiv,
                                                                std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<double>>(
-        queue, n, lda, stride_a, stride_ipiv, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<double>>(
+            queue, n, lda, stride_a, stride_ipiv, batch_size));
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::transpose trans,
+std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t stride_ipiv,
                                                 std::int64_t ldb, std::int64_t stride_b,
                                                 std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<float>(
-        queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size<float>(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb,
+        stride_b, batch_size));
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::transpose trans,
+std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::transpose trans,
                                                  std::int64_t n, std::int64_t nrhs,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t stride_ipiv, std::int64_t ldb,
                                                  std::int64_t stride_b, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<double>(
-        queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size<double>(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb,
+        stride_b, batch_size));
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<float>>(
-        queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<float>>(
+            queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb,
+            stride_b, batch_size));
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<double>>(
-        queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb,
+            stride_b, batch_size));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t lda, std::int64_t stride_a,
                                                 std::int64_t stride_tau, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<float>(queue, m, n, lda, stride_a,
-                                                                     stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<float>(
+        queue, m, n, lda, stride_a, stride_tau, batch_size));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t stride_tau, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<double>(queue, m, n, lda, stride_a,
-                                                                      stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<double>(
+        queue, m, n, lda, stride_a, stride_tau, batch_size));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t m,
@@ -2460,8 +2778,9 @@ std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue
                                                               std::int64_t stride_a,
                                                               std::int64_t stride_tau,
                                                               std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<float>>(
-        queue, m, n, lda, stride_a, stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<float>>(
+            queue, m, n, lda, stride_a, stride_tau, batch_size));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t m,
@@ -2469,309 +2788,334 @@ std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
                                                                std::int64_t stride_a,
                                                                std::int64_t stride_tau,
                                                                std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<double>>(
-        queue, m, n, lda, stride_a, stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<double>>(
+            queue, m, n, lda, stride_a, stride_tau, batch_size));
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<float>(queue, uplo, n, lda, stride_a,
-                                                                     batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size));
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                  std::int64_t n, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<double>(queue, uplo, n, lda, stride_a,
-                                                                      batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size));
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                              oneapi::mkl::uplo uplo,
+                                                              oneapi::math::uplo uplo,
                                                               std::int64_t n, std::int64_t lda,
                                                               std::int64_t stride_a,
                                                               std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<float>>(
-        queue, uplo, n, lda, stride_a, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<float>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size));
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                               oneapi::mkl::uplo uplo,
+                                                               oneapi::math::uplo uplo,
                                                                std::int64_t n, std::int64_t lda,
                                                                std::int64_t stride_a,
                                                                std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<double>>(
-        queue, uplo, n, lda, stride_a, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size));
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t ldb,
                                                 std::int64_t stride_b, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<float>(
-        queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size));
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                  std::int64_t n, std::int64_t nrhs,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t ldb, std::int64_t stride_b,
                                                  std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<double>(
-        queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size));
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<float>>(
-        queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<float>>(
+            queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b,
+            batch_size));
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<double>>(
-        queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size) {
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b,
+            batch_size));
 }
 template <>
 std::int64_t orgqr_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t k, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t stride_tau,
                                                 std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<float>(queue, m, n, k, lda, stride_a,
-                                                                     stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<float>(
+        queue, m, n, k, lda, stride_a, stride_tau, batch_size));
 }
 template <>
 std::int64_t orgqr_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                  std::int64_t k, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t stride_tau,
                                                  std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<double>(queue, m, n, k, lda, stride_a,
-                                                                      stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<double>(
+        queue, m, n, k, lda, stride_a, stride_tau, batch_size));
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
     sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
     std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<float>>(
-        queue, m, n, k, lda, stride_a, stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<float>>(
+            queue, m, n, k, lda, stride_a, stride_tau, batch_size));
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
     sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
     std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) {
-    return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<double>>(
-        queue, m, n, k, lda, stride_a, stride_tau, batch_size);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<double>>(
+            queue, m, n, k, lda, stride_a, stride_tau, batch_size));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
                                                 std::int64_t* n, std::int64_t* lda,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<float>(queue, m, n, lda, group_count,
-                                                                     group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size<float>(
+        queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
                                                  std::int64_t* n, std::int64_t* lda,
                                                  std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<double>(queue, m, n, lda, group_count,
-                                                                      group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size<double>(
+        queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t* m,
                                                               std::int64_t* n, std::int64_t* lda,
                                                               std::int64_t group_count,
                                                               std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<float>>(
-        queue, m, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<float>>(
+            queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t* m,
                                                                std::int64_t* n, std::int64_t* lda,
                                                                std::int64_t group_count,
                                                                std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<double>>(
-        queue, m, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrf_batch_scratchpad_size<std::complex<double>>(
+            queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* n,
                                                 std::int64_t* lda, std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<float>(queue, n, lda, group_count,
-                                                                     group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size<float>(
+        queue, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* n,
                                                  std::int64_t* lda, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<double>(queue, n, lda, group_count,
-                                                                      group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size<double>(
+        queue, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t* n,
                                                               std::int64_t* lda,
                                                               std::int64_t group_count,
                                                               std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<float>>(
-        queue, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<float>>(
+            queue, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t* n,
                                                                std::int64_t* lda,
                                                                std::int64_t group_count,
                                                                std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<double>>(
-        queue, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getri_batch_scratchpad_size<std::complex<double>>(
+            queue, n, lda, group_count, group_sizes));
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::transpose* trans,
+std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::transpose* trans,
                                                 std::int64_t* n, std::int64_t* nrhs,
                                                 std::int64_t* lda, std::int64_t* ldb,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<float>(queue, trans, n, nrhs, lda,
-                                                                     ldb, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size<float>(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, group_sizes));
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::transpose* trans,
+std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::transpose* trans,
                                                  std::int64_t* n, std::int64_t* nrhs,
                                                  std::int64_t* lda, std::int64_t* ldb,
                                                  std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<double>(
-        queue, trans, n, nrhs, lda, ldb, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size<double>(
+        queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, group_sizes));
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
     std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<float>>(
-        queue, trans, n, nrhs, lda, ldb, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<float>>(
+            queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count,
+            group_sizes));
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
     std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<double>>(
-        queue, trans, n, nrhs, lda, ldb, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::getrs_batch_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count,
+            group_sizes));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
                                                 std::int64_t* n, std::int64_t* lda,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<float>(queue, m, n, lda, group_count,
-                                                                     group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<float>(
+        queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
                                                  std::int64_t* n, std::int64_t* lda,
                                                  std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<double>(queue, m, n, lda, group_count,
-                                                                      group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<double>(
+        queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t* m,
                                                               std::int64_t* n, std::int64_t* lda,
                                                               std::int64_t group_count,
                                                               std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<float>>(
-        queue, m, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<float>>(
+            queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t* m,
                                                                std::int64_t* n, std::int64_t* lda,
                                                                std::int64_t group_count,
                                                                std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<double>>(
-        queue, m, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size<std::complex<double>>(
+            queue, m, n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t orgqr_batch_scratchpad_size<float>(sycl::queue& queue, std::int64_t* m,
                                                 std::int64_t* n, std::int64_t* k, std::int64_t* lda,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<float>(queue, m, n, k, lda,
-                                                                     group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<float>(
+        queue, m, n, k, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t orgqr_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_t* m,
                                                  std::int64_t* n, std::int64_t* k,
                                                  std::int64_t* lda, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<double>(queue, m, n, k, lda,
-                                                                      group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size<double>(
+        queue, m, n, k, lda, group_count, group_sizes));
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                 std::int64_t* n, std::int64_t* lda,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<float>(queue, uplo, n, lda,
-                                                                     group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes));
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                  std::int64_t* n, std::int64_t* lda,
                                                  std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<double>(queue, uplo, n, lda,
-                                                                      group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                              oneapi::mkl::uplo* uplo,
+                                                              oneapi::math::uplo* uplo,
                                                               std::int64_t* n, std::int64_t* lda,
                                                               std::int64_t group_count,
                                                               std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<float>>(
-        queue, uplo, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<float>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                               oneapi::mkl::uplo* uplo,
+                                                               oneapi::math::uplo* uplo,
                                                                std::int64_t* n, std::int64_t* lda,
                                                                std::int64_t group_count,
                                                                std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<double>>(
-        queue, uplo, n, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrf_batch_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes));
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+std::int64_t potrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                 std::int64_t* n, std::int64_t* nrhs,
                                                 std::int64_t* lda, std::int64_t* ldb,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<float>(queue, uplo, n, nrhs, lda, ldb,
-                                                                     group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size<float>(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes));
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+std::int64_t potrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                  std::int64_t* n, std::int64_t* nrhs,
                                                  std::int64_t* lda, std::int64_t* ldb,
                                                  std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<double>(
-        queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size<double>(
+        queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes));
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
     std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<float>>(
-        queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<float>>(
+            queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes));
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+    sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
     std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<double>>(
-        queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::potrs_batch_scratchpad_size<std::complex<double>>(
+            queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes));
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue, std::int64_t* m,
@@ -2779,8 +3123,9 @@ std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue
                                                               std::int64_t* lda,
                                                               std::int64_t group_count,
                                                               std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<float>>(
-        queue, m, n, k, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<float>>(
+            queue, m, n, k, lda, group_count, group_sizes));
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue, std::int64_t* m,
@@ -2788,6 +3133,7 @@ std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
                                                                std::int64_t* lda,
                                                                std::int64_t group_count,
                                                                std::int64_t* group_sizes) {
-    return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<double>>(
-        queue, m, n, k, lda, group_count, group_sizes);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size<std::complex<double>>(
+            queue, m, n, k, lda, group_count, group_sizes));
 }
diff --git a/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp b/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp
deleted file mode 100644
index 992659a67..000000000
--- a/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp
+++ /dev/null
@@ -1,1276 +0,0 @@
-/*******************************************************************************
-* Copyright 2021 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions
-* and limitations under the License.
-*
-*
-* SPDX-License-Identifier: Apache-2.0
-*******************************************************************************/
-
-#pragma once
-
-#include <complex>
-
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-
-namespace oneapi {
-namespace mkl {
-namespace lapack {
-
-void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-           std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
-           sycl::buffer<std::complex<float>>& tauq, sycl::buffer<std::complex<float>>& taup,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& d, sycl::buffer<double>& e,
-           sycl::buffer<double>& tauq, sycl::buffer<double>& taup, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
-           sycl::buffer<float>& tauq, sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& d,
-           sycl::buffer<double>& e, sycl::buffer<std::complex<double>>& tauq,
-           sycl::buffer<std::complex<double>>& taup, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-           std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-           std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-           std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-           std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-           sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-           sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-           std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<double>& b, std::int64_t ldb, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
-           std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& s,
-           sycl::buffer<double>& u, std::int64_t ldu, sycl::buffer<double>& vt, std::int64_t ldvt,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& s,
-           sycl::buffer<float>& u, std::int64_t ldu, sycl::buffer<float>& vt, std::int64_t ldvt,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<float>& s, sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
-           sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
-           std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<double>& s, sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
-           sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<float>& w,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& w,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-           std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& b, std::int64_t ldb, sycl::buffer<float>& w,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-           std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& b, std::int64_t ldb, sycl::buffer<double>& w,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<float>& d,
-           sycl::buffer<float>& e, sycl::buffer<std::complex<float>>& tau,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& d,
-           sycl::buffer<double>& e, sycl::buffer<std::complex<double>>& tau,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-           std::int64_t k, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-           std::int64_t k, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-           sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-           sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-           sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-           sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-           sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
-           sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& w,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& w,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-           std::int64_t n, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
-           std::int64_t ldb, sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-           std::int64_t n, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
-           std::int64_t ldb, sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& d, sycl::buffer<double>& e,
-           sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
-           sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
-           std::int64_t scratchpad_size);
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
-           std::int64_t scratchpad_size);
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
-           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
-           std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
-           std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
-           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-           std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size);
-void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-           std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size);
-void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size);
-void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size);
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size);
-void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size);
-void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size);
-void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<float>& tau,
-                 std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                 std::int64_t scratchpad_size);
-void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
-                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<double>& tau,
-                 std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                 std::int64_t scratchpad_size);
-void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
-                 std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
-                 std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
-                 std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                 std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
-                 std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                 std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<float>>& a,
-                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                 std::int64_t stride_ipiv, std::int64_t batch_size,
-                 sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size);
-void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>>& a,
-                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                 std::int64_t stride_ipiv, std::int64_t batch_size,
-                 sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size);
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                 std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv, sycl::buffer<float>& b,
-                 std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                 sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                 std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
-                 std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                 sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
-                 std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                 std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-                 std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                 sycl::buffer<std::complex<float>>& b, std::int64_t ldb, std::int64_t stride_b,
-                 std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                 std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-                 std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                 sycl::buffer<std::complex<double>>& b, std::int64_t ldb, std::int64_t stride_b,
-                 std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
-                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                 std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
-                 std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
-                 std::int64_t stride_ipiv, std::int64_t batch_size,
-                 sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                 std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                 sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
-                 std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                 sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<float>& tau, std::int64_t stride_tau, std::int64_t batch_size,
-                 sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                 sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<double>& tau, std::int64_t stride_tau, std::int64_t batch_size,
-                 sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                 std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                 sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                 sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                 std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                 std::int64_t scratchpad_size);
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                 sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
-                 std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                 sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
-                 std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                 sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
-                 std::int64_t batch_size, sycl::buffer<float>& scratchpad,
-                 std::int64_t scratchpad_size);
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                 sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
-                 std::int64_t batch_size, sycl::buffer<double>& scratchpad,
-                 std::int64_t scratchpad_size);
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                 sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::complex<float>>& b, std::int64_t ldb, std::int64_t stride_b,
-                 std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                 sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::complex<double>>& b, std::int64_t ldb, std::int64_t stride_b,
-                 std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                 sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
-                 std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
-                 std::int64_t scratchpad_size);
-void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                 sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
-                 sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
-                 std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
-                 std::int64_t scratchpad_size);
-sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
-                  std::int64_t lda, float* d, float* e, std::complex<float>* tauq,
-                  std::complex<float>* taup, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
-                  double* d, double* e, double* tauq, double* taup, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
-                  float* d, float* e, float* tauq, float* taup, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
-                  std::int64_t lda, double* d, double* e, std::complex<double>* tauq,
-                  std::complex<double>* taup, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
-                  float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
-                  double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
-                  std::int64_t lda, std::complex<float>* tau, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
-                  std::int64_t lda, std::complex<double>* tau, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
-                  std::int64_t lda, std::complex<float>* tau, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
-                  double* tau, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
-                  float* tau, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
-                  std::int64_t lda, std::complex<double>* tau, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
-                  std::int64_t lda, std::int64_t* ipiv, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
-                  std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
-                  std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
-                  std::int64_t lda, std::int64_t* ipiv, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                  const std::int64_t* ipiv, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
-                  const std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
-                  const std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                  const std::int64_t* ipiv, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                  std::int64_t nrhs, const std::complex<float>* a, std::int64_t lda,
-                  const std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                  std::int64_t nrhs, const double* a, std::int64_t lda, const std::int64_t* ipiv,
-                  double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                  std::int64_t nrhs, const float* a, std::int64_t lda, const std::int64_t* ipiv,
-                  float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                  std::int64_t nrhs, const std::complex<double>* a, std::int64_t lda,
-                  const std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
-                  std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                  std::int64_t m, std::int64_t n, double* a, std::int64_t lda, double* s, double* u,
-                  std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                  std::int64_t m, std::int64_t n, float* a, std::int64_t lda, float* s, float* u,
-                  std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                  std::int64_t m, std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                  float* s, std::complex<float>* u, std::int64_t ldu, std::complex<float>* vt,
-                  std::int64_t ldvt, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
-                  std::int64_t m, std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                  double* s, std::complex<double>* u, std::int64_t ldu, std::complex<double>* vt,
-                  std::int64_t ldvt, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, float* w,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, double* w,
-                  std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a, std::int64_t lda,
-                  std::complex<float>* b, std::int64_t ldb, float* w,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                  std::complex<double>* b, std::int64_t ldb, double* w,
-                  std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, float* d, float* e,
-                  std::complex<float>* tau, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, double* d, double* e,
-                  std::complex<double>* tau, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                  std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                  std::int64_t k, float* a, std::int64_t lda, const float* tau, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                  std::int64_t k, double* a, std::int64_t lda, const double* tau,
-                  double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, double* a,
-                  std::int64_t lda, const double* tau, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, float* a,
-                  std::int64_t lda, const float* tau, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                  std::int64_t lda, const float* tau, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                  std::int64_t lda, const double* tau, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, const float* a,
-                  std::int64_t lda, const float* tau, float* c, std::int64_t ldc, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, const double* a,
-                  std::int64_t lda, const double* tau, double* c, std::int64_t ldc,
-                  double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
-                  const float* tau, float* c, std::int64_t ldc, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
-                  const double* tau, double* c, std::int64_t ldc, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda,
-                  const double* tau, double* c, std::int64_t ldc, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda,
-                  const float* tau, float* c, std::int64_t ldc, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                  std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                  std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                  std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                  std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                  const float* a, std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                  const double* a, std::int64_t lda, double* b, std::int64_t ldb,
-                  double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                  const std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                  std::int64_t ldb, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                  const std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                  std::int64_t ldb, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  double* a, std::int64_t lda, double* w, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
-                  float* a, std::int64_t lda, float* w, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b,
-                  std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b,
-                  std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                  std::int64_t lda, double* d, double* e, double* tau, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                  std::int64_t lda, float* d, float* e, float* tau, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                  std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                  std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
-                  std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                  const std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
-                  std::int64_t ldb, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, const double* a,
-                  std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, const float* a,
-                  std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                  const std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
-                  std::int64_t ldb, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                  std::int64_t k, std::complex<float>* a, std::int64_t lda,
-                  const std::complex<float>* tau, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,
-                  std::int64_t k, std::complex<double>* a, std::int64_t lda,
-                  const std::complex<double>* tau, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                  std::complex<float>* a, std::int64_t lda, const std::complex<float>* tau,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                  std::complex<double>* a, std::int64_t lda, const std::complex<double>* tau,
-                  std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<float>* a, std::int64_t lda, const std::complex<float>* tau,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                  std::complex<double>* a, std::int64_t lda, const std::complex<double>* tau,
-                  std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const std::complex<float>* a,
-                  std::int64_t lda, const std::complex<float>* tau, std::complex<float>* c,
-                  std::int64_t ldc, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const std::complex<double>* a,
-                  std::int64_t lda, const std::complex<double>* tau, std::complex<double>* c,
-                  std::int64_t ldc, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const std::complex<float>* a,
-                  std::int64_t lda, const std::complex<float>* tau, std::complex<float>* c,
-                  std::int64_t ldc, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
-                  std::int64_t m, std::int64_t n, std::int64_t k, const std::complex<double>* a,
-                  std::int64_t lda, const std::complex<double>* tau, std::complex<double>* c,
-                  std::int64_t ldc, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies = {});
-sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                  const std::complex<float>* a, std::int64_t lda, const std::complex<float>* tau,
-                  std::complex<float>* c, std::int64_t ldc, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                  const std::complex<double>* a, std::int64_t lda, const std::complex<double>* tau,
-                  std::complex<double>* c, std::int64_t ldc, std::complex<double>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                        std::int64_t lda, std::int64_t stride_a, float* tau,
-                        std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                        std::int64_t lda, std::int64_t stride_a, double* tau,
-                        std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
-                        std::int64_t lda, std::int64_t stride_a, std::complex<float>* tau,
-                        std::int64_t stride_tau, std::int64_t batch_size,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
-                        std::int64_t lda, std::int64_t stride_a, std::complex<double>* tau,
-                        std::int64_t stride_tau, std::int64_t batch_size,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a,
-                        std::int64_t* lda, float** tau, std::int64_t group_count,
-                        std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a,
-                        std::int64_t* lda, double** tau, std::int64_t group_count,
-                        std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                        std::complex<float>** a, std::int64_t* lda, std::complex<float>** tau,
-                        std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                        std::complex<double>** a, std::int64_t* lda, std::complex<double>** tau,
-                        std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a,
-                        std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                        std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a,
-                        std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                        std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<float>* a,
-                        std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                        std::int64_t stride_ipiv, std::int64_t batch_size,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex<double>* a,
-                        std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
-                        std::int64_t stride_ipiv, std::int64_t batch_size,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a,
-                        std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
-                        std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a,
-                        std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
-                        std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                        std::complex<float>** a, std::int64_t* lda, std::int64_t** ipiv,
-                        std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                        std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
-                        std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda,
-                        std::int64_t stride_a, const std::int64_t* ipiv, std::int64_t stride_ipiv,
-                        std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda,
-                        std::int64_t stride_a, const std::int64_t* ipiv, std::int64_t stride_ipiv,
-                        std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<float>* a,
-                        std::int64_t lda, std::int64_t stride_a, const std::int64_t* ipiv,
-                        std::int64_t stride_ipiv, std::int64_t batch_size,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex<double>* a,
-                        std::int64_t lda, std::int64_t stride_a, const std::int64_t* ipiv,
-                        std::int64_t stride_ipiv, std::int64_t batch_size,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda,
-                        const std::int64_t* const* ipiv, std::int64_t group_count,
-                        std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda,
-                        const std::int64_t* const* ipiv, std::int64_t group_count,
-                        std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<float>** a,
-                        std::int64_t* lda, const std::int64_t* const* ipiv,
-                        std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<double>** a,
-                        std::int64_t* lda, const std::int64_t* const* ipiv,
-                        std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                        std::int64_t nrhs, const float* a, std::int64_t lda, std::int64_t stride_a,
-                        const std::int64_t* ipiv, std::int64_t stride_ipiv, float* b,
-                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                        float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                        std::int64_t nrhs, const double* a, std::int64_t lda, std::int64_t stride_a,
-                        const std::int64_t* ipiv, std::int64_t stride_ipiv, double* b,
-                        std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                        double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                        std::int64_t nrhs, const std::complex<float>* a, std::int64_t lda,
-                        std::int64_t stride_a, const std::int64_t* ipiv, std::int64_t stride_ipiv,
-                        std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
-                        std::int64_t batch_size, std::complex<float>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                        std::int64_t nrhs, const std::complex<double>* a, std::int64_t lda,
-                        std::int64_t stride_a, const std::int64_t* ipiv, std::int64_t stride_ipiv,
-                        std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
-                        std::int64_t batch_size, std::complex<double>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-                        std::int64_t* nrhs, const float* const* a, std::int64_t* lda,
-                        const std::int64_t* const* ipiv, float** b, std::int64_t* ldb,
-                        std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-                        std::int64_t* nrhs, const double* const* a, std::int64_t* lda,
-                        const std::int64_t* const* ipiv, double** b, std::int64_t* ldb,
-                        std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-                        std::int64_t* nrhs, const std::complex<float>* const* a, std::int64_t* lda,
-                        const std::int64_t* const* ipiv, std::complex<float>** b, std::int64_t* ldb,
-                        std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-                        std::int64_t* nrhs, const std::complex<double>* const* a, std::int64_t* lda,
-                        const std::int64_t* const* ipiv, std::complex<double>** b,
-                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                        float* a, std::int64_t lda, std::int64_t stride_a, const float* tau,
-                        std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                        double* a, std::int64_t lda, std::int64_t stride_a, const double* tau,
-                        std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                        float** a, std::int64_t* lda, const float* const* tau,
-                        std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                        double** a, std::int64_t* lda, const double* const* tau,
-                        std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
-                        std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                        float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
-                        std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                        double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                        std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-                        std::int64_t batch_size, std::complex<float>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                        std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-                        std::int64_t batch_size, std::complex<double>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, float** a,
-                        std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes,
-                        float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, double** a,
-                        std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes,
-                        double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                        std::complex<float>** a, std::int64_t* lda, std::int64_t group_count,
-                        std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                        std::complex<double>** a, std::int64_t* lda, std::int64_t group_count,
-                        std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                        std::int64_t nrhs, const float* a, std::int64_t lda, std::int64_t stride_a,
-                        float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                        float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                        std::int64_t nrhs, const double* a, std::int64_t lda, std::int64_t stride_a,
-                        double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
-                        double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                        std::int64_t nrhs, const std::complex<float>* a, std::int64_t lda,
-                        std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb,
-                        std::int64_t stride_b, std::int64_t batch_size,
-                        std::complex<float>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                        std::int64_t nrhs, const std::complex<double>* a, std::int64_t lda,
-                        std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb,
-                        std::int64_t stride_b, std::int64_t batch_size,
-                        std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                        std::int64_t* nrhs, const float* const* a, std::int64_t* lda, float** b,
-                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
-                        float* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                        std::int64_t* nrhs, const double* const* a, std::int64_t* lda, double** b,
-                        std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
-                        double* scratchpad, std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                        std::int64_t* nrhs, const std::complex<float>* const* a, std::int64_t* lda,
-                        std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
-                        std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
-                        std::int64_t* nrhs, const std::complex<double>* const* a, std::int64_t* lda,
-                        std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
-                        std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                        std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
-                        const std::complex<float>* tau, std::int64_t stride_tau,
-                        std::int64_t batch_size, std::complex<float>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
-                        std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
-                        const std::complex<double>* tau, std::int64_t stride_tau,
-                        std::int64_t batch_size, std::complex<double>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                        std::complex<float>** a, std::int64_t* lda,
-                        const std::complex<float>* const* tau, std::int64_t group_count,
-                        std::int64_t* group_sizes, std::complex<float>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k,
-                        std::complex<double>** a, std::int64_t* lda,
-                        const std::complex<double>* const* tau, std::int64_t group_count,
-                        std::int64_t* group_sizes, std::complex<double>* scratchpad,
-                        std::int64_t scratchpad_size,
-                        const std::vector<sycl::event>& dependencies = {});
-
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda, std::int64_t ldu, std::int64_t ldvt);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                   std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t nrhs, std::int64_t lda, std::int64_t ldb);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>
-std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                                   std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-                                   std::int64_t ldb);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m,
-                                   std::int64_t n, std::int64_t k, std::int64_t lda);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                   std::int64_t lda);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
-                                   std::int64_t k, std::int64_t lda, std::int64_t ldc);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                   std::int64_t m, std::int64_t n, std::int64_t lda,
-                                   std::int64_t ldc);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_ipiv, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_ipiv,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans,
-                                         std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-                                         std::int64_t stride_a, std::int64_t stride_ipiv,
-                                         std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t ldb, std::int64_t stride_b,
-                                         std::int64_t batch_size);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t k, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n,
-                                         std::int64_t k, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t stride_tau, std::int64_t batch_size);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans,
-                                         std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* lda, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_real_floating_point<fp_type> = nullptr>
-std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* k, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                         std::int64_t* n, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-template <typename fp_type, oneapi::mkl::lapack::internal::is_floating_point<fp_type> = nullptr>
-std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo,
-                                         std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda,
-                                         std::int64_t* ldb, std::int64_t group_count,
-                                         std::int64_t* group_sizes);
-template <typename fp_type,
-          oneapi::mkl::lapack::internal::is_complex_floating_point<fp_type> = nullptr>
-std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n,
-                                         std::int64_t* k, std::int64_t* lda,
-                                         std::int64_t group_count, std::int64_t* group_sizes);
-
-} // namespace lapack
-} // namespace mkl
-} // namespace oneapi
diff --git a/src/lapack/backends/mklcpu/CMakeLists.txt b/src/lapack/backends/mklcpu/CMakeLists.txt
index fcc60a8e7..62a04f323 100644
--- a/src/lapack/backends/mklcpu/CMakeLists.txt
+++ b/src/lapack/backends/mklcpu/CMakeLists.txt
@@ -17,40 +17,43 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_lapack_mklcpu)
+set(LIB_NAME onemath_lapack_mklcpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   mkl_lapack.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: lapack_cpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_lapack ${LIB_NAME})
+add_dependencies(onemath_backend_libs_lapack ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
+          ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::LAPACK)
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_SYCL::LAPACK)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_SYCL::LAPACK)
 else()
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_DPCPP)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_DPCPP)
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -63,8 +66,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/lapack/backends/mklcpu/lapack_cpu_wrappers.cpp b/src/lapack/backends/mklcpu/lapack_cpu_wrappers.cpp
index 4bd0713fa..abdfcda46 100644
--- a/src/lapack/backends/mklcpu/lapack_cpu_wrappers.cpp
+++ b/src/lapack/backends/mklcpu/lapack_cpu_wrappers.cpp
@@ -18,11 +18,11 @@
 *******************************************************************************/
 
 #include "lapack/function_table.hpp"
-#include "oneapi/mkl/lapack/detail/mklcpu/onemkl_lapack_mklcpu.hpp"
+#include "oneapi/math/lapack/detail/mklcpu/onemath_lapack_mklcpu.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT lapack_function_table_t mkl_lapack_table = {
+extern "C" ONEMATH_EXPORT lapack_function_table_t onemath_lapack_table = {
     WRAPPER_VERSION,
 #define LAPACK_BACKEND mklcpu
 #include "../mkl_common/lapack_wrappers.cxx"
diff --git a/src/lapack/backends/mklcpu/mkl_lapack.cpp b/src/lapack/backends/mklcpu/mkl_lapack.cpp
index cbd3aaa84..1745cc83e 100644
--- a/src/lapack/backends/mklcpu/mkl_lapack.cpp
+++ b/src/lapack/backends/mklcpu/mkl_lapack.cpp
@@ -23,13 +23,17 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/lapack/detail/mklcpu/onemkl_lapack_mklcpu.hpp"
-#include "../mkl_common/mkl_lapack_backend.hpp"
+// Intel(R) oneMKL header
+#include <mkl/lapack.hpp>
+
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/lapack/detail/mklcpu/onemath_lapack_mklcpu.hpp"
+
+#include "common_onemkl_conversion.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace mklcpu {
 
@@ -37,5 +41,5 @@ namespace mklcpu {
 
 } // namespace mklcpu
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/mklgpu/CMakeLists.txt b/src/lapack/backends/mklgpu/CMakeLists.txt
index e11592f82..2786e462c 100644
--- a/src/lapack/backends/mklgpu/CMakeLists.txt
+++ b/src/lapack/backends/mklgpu/CMakeLists.txt
@@ -17,40 +17,43 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_lapack_mklgpu)
+set(LIB_NAME onemath_lapack_mklgpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   mkl_lapack.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: lapack_gpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_lapack ${LIB_NAME})
+add_dependencies(onemath_backend_libs_lapack ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
+          ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::LAPACK)
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_SYCL::LAPACK)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_SYCL::LAPACK)
 else()
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_DPCPP)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_DPCPP)
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -63,8 +66,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/lapack/backends/mklgpu/lapack_gpu_wrappers.cpp b/src/lapack/backends/mklgpu/lapack_gpu_wrappers.cpp
index cd3933274..dd2d39e42 100644
--- a/src/lapack/backends/mklgpu/lapack_gpu_wrappers.cpp
+++ b/src/lapack/backends/mklgpu/lapack_gpu_wrappers.cpp
@@ -18,11 +18,11 @@
 *******************************************************************************/
 
 #include "lapack/function_table.hpp"
-#include "oneapi/mkl/lapack/detail/mklgpu/onemkl_lapack_mklgpu.hpp"
+#include "oneapi/math/lapack/detail/mklgpu/onemath_lapack_mklgpu.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT lapack_function_table_t mkl_lapack_table = {
+extern "C" ONEMATH_EXPORT lapack_function_table_t onemath_lapack_table = {
     WRAPPER_VERSION,
 #define LAPACK_BACKEND mklgpu
 #include "../mkl_common/lapack_wrappers.cxx"
diff --git a/src/lapack/backends/mklgpu/mkl_lapack.cpp b/src/lapack/backends/mklgpu/mkl_lapack.cpp
index f7bc2a7e6..32ee6f781 100644
--- a/src/lapack/backends/mklgpu/mkl_lapack.cpp
+++ b/src/lapack/backends/mklgpu/mkl_lapack.cpp
@@ -23,13 +23,17 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
-#include "oneapi/mkl/lapack/types.hpp"
-#include "oneapi/mkl/lapack/detail/mklgpu/onemkl_lapack_mklgpu.hpp"
-#include "../mkl_common/mkl_lapack_backend.hpp"
+// Intel(R) oneMKL header
+#include <mkl/lapack.hpp>
+
+#include "oneapi/math/types.hpp"
+#include "oneapi/math/lapack/types.hpp"
+#include "oneapi/math/lapack/detail/mklgpu/onemath_lapack_mklgpu.hpp"
+
+#include "common_onemkl_conversion.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace mklgpu {
 
@@ -37,5 +41,5 @@ namespace mklgpu {
 
 } // namespace mklgpu
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/rocsolver/CMakeLists.txt b/src/lapack/backends/rocsolver/CMakeLists.txt
index c91089118..fe63454c1 100644
--- a/src/lapack/backends/rocsolver/CMakeLists.txt
+++ b/src/lapack/backends/rocsolver/CMakeLists.txt
@@ -19,7 +19,7 @@
 #
 #=========================================================================
 
-set(LIB_NAME onemkl_lapack_rocsolver)
+set(LIB_NAME onemath_lapack_rocsolver)
 set(LIB_OBJ ${LIB_NAME}_obj)
 find_package(hip REQUIRED)
 find_package(rocsolver REQUIRED)
@@ -27,22 +27,23 @@ find_package(Threads REQUIRED)
 
 set(SOURCES rocsolver_lapack.cpp
 		rocsolver_batch.cpp
-	        $<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},dpc++>:rocsolver_scope_handle.cpp>
+	        $<$<STREQUAL:${ONEMATH_SYCL_IMPLEMENTATION},dpc++>:rocsolver_scope_handle.cpp>
           $<$<BOOL:${BUILD_SHARED_LIBS}>: rocsolver_wrappers.cpp>)
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_lapack ${LIB_NAME})
+add_dependencies(onemath_backend_libs_lapack ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src/include
           ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 target_link_libraries(${LIB_OBJ} PRIVATE roc::rocsolver hip::host Threads::Threads)
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL)
 target_compile_features(${LIB_OBJ} PUBLIC cxx_std_17)
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON)
@@ -60,8 +61,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/lapack/backends/rocsolver/rocsolver_batch.cpp b/src/lapack/backends/rocsolver/rocsolver_batch.cpp
index 2965faf51..a84be1fe6 100644
--- a/src/lapack/backends/rocsolver/rocsolver_batch.cpp
+++ b/src/lapack/backends/rocsolver/rocsolver_batch.cpp
@@ -21,11 +21,11 @@
 #include "rocsolver_helper.hpp"
 #include "rocsolver_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
@@ -81,14 +81,14 @@ void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<d
                  sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "getri_batch");
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv, sycl::buffer<float>& b,
                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                  sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "getrs_batch");
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -96,7 +96,7 @@ void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "getrs_batch");
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<std::complex<float>>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -104,7 +104,7 @@ void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "getrs_batch");
 }
-void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                  std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<std::complex<double>>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -150,51 +150,52 @@ void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_
                  sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "orgqr_batch");
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
-                 std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
-                 sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
+                 sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
+                 std::int64_t batch_size, sycl::buffer<float>& scratchpad,
+                 std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "potrf_batch");
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "potrf_batch");
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "potrf_batch");
 }
-void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "potrf_batch");
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "potrs_batch");
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "potrs_batch");
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<float>>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
                  std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "potrs_batch");
 }
-void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
+void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<double>>& b, std::int64_t ldb, std::int64_t stride_b,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
@@ -378,7 +379,7 @@ sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex<double
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getri_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size, float* scratchpad,
@@ -386,7 +387,7 @@ sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::i
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getrs_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size, double* scratchpad,
@@ -394,7 +395,7 @@ sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::i
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getrs_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -403,7 +404,7 @@ sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::i
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getrs_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
@@ -412,21 +413,21 @@ sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::i
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getrs_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv,
                         float** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getrs_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv,
                         double** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getrs_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, std::complex<float>** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::complex<float>** b, std::int64_t* ldb,
                         std::int64_t group_count, std::int64_t* group_sizes,
@@ -434,7 +435,7 @@ sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "getrs_batch");
 }
-sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
+sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n,
                         std::int64_t* nrhs, std::complex<double>** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::complex<double>** b, std::int64_t* ldb,
                         std::int64_t group_count, std::int64_t* group_sizes,
@@ -468,26 +469,26 @@ sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, st
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "orgqr_batch");
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                         float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "potrf_batch");
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size,
                         double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "potrf_batch");
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t batch_size, std::complex<float>* scratchpad,
                         std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "potrf_batch");
 }
-sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t batch_size, std::complex<double>* scratchpad,
                         std::int64_t scratchpad_size,
@@ -497,7 +498,7 @@ sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t
 
 template <typename Func, typename T>
 inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda,
+                               oneapi::math::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda,
                                std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad,
                                std::int64_t scratchpad_size,
                                const std::vector<sycl::event>& dependencies) {
@@ -520,7 +521,7 @@ inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& qu
             cgh.depends_on(dependencies[i]);
         }
         cgh.depends_on(done_cpy);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             rocblas_status err;
@@ -538,13 +539,13 @@ inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& qu
 }
 
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
-#define POTRF_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                          \
-    sycl::event potrf_batch(                                                                       \
-        sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, TYPE** a, std::int64_t* lda, \
-        std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad,                     \
-        std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {              \
-        return potrf_batch(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda,          \
-                           group_count, group_sizes, scratchpad, scratchpad_size, dependencies);   \
+#define POTRF_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                         \
+    sycl::event potrf_batch(                                                                      \
+        sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, TYPE** a,                  \
+        std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \
+        std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {             \
+        return potrf_batch(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda,         \
+                           group_count, group_sizes, scratchpad, scratchpad_size, dependencies);  \
     }
 
 POTRF_BATCH_LAUNCHER_USM(float, rocsolver_spotrf_batched)
@@ -554,21 +555,21 @@ POTRF_BATCH_LAUNCHER_USM(std::complex<double>, rocsolver_zpotrf_batched)
 
 #undef POTRF_BATCH_LAUNCHER_USM
 
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a,
                         float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                         float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "potrs_batch");
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a,
                         double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
                         double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "potrs_batch");
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size,
@@ -576,7 +577,7 @@ sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t
                         const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "potrs_batch");
 }
-sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<double>* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size,
@@ -587,7 +588,7 @@ sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t
 
 template <typename Func, typename T>
 inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue,
-                               oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a,
+                               oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a,
                                std::int64_t* lda, T** b, std::int64_t* ldb,
                                std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad,
                                std::int64_t scratchpad_size,
@@ -620,7 +621,7 @@ inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& qu
         }
         cgh.depends_on(done_cpy_a);
         cgh.depends_on(done_cpy_b);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             int64_t offset = 0;
             rocblas_status err;
@@ -641,7 +642,7 @@ inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& qu
 // Scratchpad memory not needed as parts of buffer a is used as workspace memory
 #define POTRS_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                          \
     sycl::event potrs_batch(                                                                       \
-        sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,          \
+        sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,         \
         TYPE** a, std::int64_t* lda, TYPE** b, std::int64_t* ldb, std::int64_t group_count,        \
         std::int64_t* group_sizes, TYPE* scratchpad, std::int64_t scratchpad_size,                 \
         const std::vector<sycl::event>& dependencies) {                                            \
@@ -747,7 +748,7 @@ std::int64_t getri_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
     throw unimplemented("lapack", "getri_batch_scratchpad_size");
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::transpose trans,
+std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t stride_ipiv,
                                                 std::int64_t ldb, std::int64_t stride_b,
@@ -755,7 +756,7 @@ std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl:
     throw unimplemented("lapack", "getrs_batch_scratchpad_size");
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::transpose trans,
+std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::transpose trans,
                                                  std::int64_t n, std::int64_t nrhs,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t stride_ipiv, std::int64_t ldb,
@@ -764,14 +765,14 @@ std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size) {
     throw unimplemented("lapack", "getrs_batch_scratchpad_size");
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+    sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size) {
     throw unimplemented("lapack", "getrs_batch_scratchpad_size");
@@ -806,20 +807,20 @@ std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
 }
 
 template <>
-std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t batch_size) {
     throw unimplemented("lapack", "potrf_batch_scratchpad_size");
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                  std::int64_t n, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t batch_size) {
     throw unimplemented("lapack", "potrf_batch_scratchpad_size");
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue,
-                                                              oneapi::mkl::uplo uplo,
+                                                              oneapi::math::uplo uplo,
                                                               std::int64_t n, std::int64_t lda,
                                                               std::int64_t stride_a,
                                                               std::int64_t batch_size) {
@@ -827,21 +828,21 @@ std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(sycl::queue& queue
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(sycl::queue& queue,
-                                                               oneapi::mkl::uplo uplo,
+                                                               oneapi::math::uplo uplo,
                                                                std::int64_t n, std::int64_t lda,
                                                                std::int64_t stride_a,
                                                                std::int64_t batch_size) {
     throw unimplemented("lapack", "potrf_batch_scratchpad_size");
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t ldb,
                                                 std::int64_t stride_b, std::int64_t batch_size) {
     throw unimplemented("lapack", "potrs_batch_scratchpad_size");
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                  std::int64_t n, std::int64_t nrhs,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t ldb, std::int64_t stride_b,
@@ -850,14 +851,16 @@ std::int64_t potrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size) {
     throw unimplemented("lapack", "potrs_batch_scratchpad_size");
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda,
-    std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
+    sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b,
+    std::int64_t batch_size) {
     throw unimplemented("lapack", "potrs_batch_scratchpad_size");
 }
 template <>
@@ -941,7 +944,7 @@ std::int64_t getri_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
     throw unimplemented("lapack", "getri_batch_scratchpad_size");
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::transpose* trans,
+std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::math::transpose* trans,
                                                 std::int64_t* n, std::int64_t* nrhs,
                                                 std::int64_t* lda, std::int64_t* ldb,
                                                 std::int64_t group_count,
@@ -949,7 +952,7 @@ std::int64_t getrs_batch_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl:
     throw unimplemented("lapack", "getrs_batch_scratchpad_size");
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::transpose* trans,
+std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::math::transpose* trans,
                                                  std::int64_t* n, std::int64_t* nrhs,
                                                  std::int64_t* lda, std::int64_t* ldb,
                                                  std::int64_t group_count,
@@ -958,13 +961,13 @@ std::int64_t getrs_batch_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
     std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) {
     throw unimplemented("lapack", "getrs_batch_scratchpad_size");
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+    sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
     std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) {
     throw unimplemented("lapack", "getrs_batch_scratchpad_size");
 }
@@ -1012,12 +1015,12 @@ std::int64_t orgqr_batch_scratchpad_size<double>(sycl::queue& queue, std::int64_
 }
 
 // rocsolverDnXpotrfBatched does not use scratchpad memory
-#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE)                                                 \
-    template <>                                                                            \
-    std::int64_t potrf_batch_scratchpad_size<TYPE>(                                        \
-        sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* lda, \
-        std::int64_t group_count, std::int64_t* group_sizes) {                             \
-        return 0;                                                                          \
+#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE)                                                  \
+    template <>                                                                             \
+    std::int64_t potrf_batch_scratchpad_size<TYPE>(                                         \
+        sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* lda, \
+        std::int64_t group_count, std::int64_t* group_sizes) {                              \
+        return 0;                                                                           \
     }
 
 POTRF_GROUP_LAUNCHER_SCRATCH(float)
@@ -1028,13 +1031,13 @@ POTRF_GROUP_LAUNCHER_SCRATCH(std::complex<double>)
 #undef POTRF_GROUP_LAUNCHER_SCRATCH
 
 // rocsolverDnXpotrsBatched does not use scratchpad memory
-#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE)                                                  \
-    template <>                                                                             \
-    std::int64_t potrs_batch_scratchpad_size<TYPE>(                                         \
-        sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \
-        std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,                     \
-        std::int64_t* group_sizes) {                                                        \
-        return 0;                                                                           \
+#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE)                                                   \
+    template <>                                                                              \
+    std::int64_t potrs_batch_scratchpad_size<TYPE>(                                          \
+        sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \
+        std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,                      \
+        std::int64_t* group_sizes) {                                                         \
+        return 0;                                                                            \
     }
 
 POTRS_GROUP_LAUNCHER_SCRATCH(float)
@@ -1063,5 +1066,5 @@ std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(sycl::queue& queu
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/rocsolver/rocsolver_handle.hpp b/src/lapack/backends/rocsolver/rocsolver_handle.hpp
index fff7d591c..4828c0254 100644
--- a/src/lapack/backends/rocsolver/rocsolver_handle.hpp
+++ b/src/lapack/backends/rocsolver/rocsolver_handle.hpp
@@ -24,7 +24,7 @@
 #include <unordered_map>
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
@@ -57,7 +57,7 @@ struct rocsolver_handle {
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // ROCSOLVER_HANDLE_HPP
diff --git a/src/lapack/backends/rocsolver/rocsolver_helper.hpp b/src/lapack/backends/rocsolver/rocsolver_helper.hpp
index 694e4e08b..5d4e6e821 100644
--- a/src/lapack/backends/rocsolver/rocsolver_helper.hpp
+++ b/src/lapack/backends/rocsolver/rocsolver_helper.hpp
@@ -32,18 +32,18 @@
 #include <hip/hip_runtime.h>
 #include <complex>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include "runtime_support_helper.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/lapack/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/lapack/exceptions.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
 // The static assert to make sure that all index types used in
-// oneMKL/include/oneapi/mkl/lapack.hpp interface are int64_t
+// oneMath/include/oneapi/math/lapack.hpp interface are int64_t
 template <typename... Next>
 struct is_int64 : std::false_type {};
 
@@ -75,7 +75,7 @@ struct Overflow<Index, T...> {
 
 template <typename Index, typename... Next>
 void overflow_check(Index index, Next... indices) {
-    static_assert(is_int64<Index, Next...>::value, "oneMKL index type must be 64 bit integer.");
+    static_assert(is_int64<Index, Next...>::value, "oneMath index type must be 64 bit integer.");
     Overflow<Index, Next...>::check(index, indices...);
 }
 
@@ -185,53 +185,53 @@ inline rocblas_eform get_rocsolver_itype(std::int64_t itype) {
     }
 }
 
-inline rocblas_evect get_rocsolver_job(oneapi::mkl::job jobz) {
+inline rocblas_evect get_rocsolver_job(oneapi::math::job jobz) {
     switch (jobz) {
-        case oneapi::mkl::job::V: return rocblas_evect_original;
-        case oneapi::mkl::job::N: return rocblas_evect_none;
+        case oneapi::math::job::V: return rocblas_evect_original;
+        case oneapi::math::job::N: return rocblas_evect_none;
         default: throw "Wrong jobz.";
     }
 }
 
-inline rocblas_svect get_rocsolver_jobsvd(oneapi::mkl::jobsvd job) {
+inline rocblas_svect get_rocsolver_jobsvd(oneapi::math::jobsvd job) {
     switch (job) {
-        case oneapi::mkl::jobsvd::N: return rocblas_svect_none;
-        case oneapi::mkl::jobsvd::A: return rocblas_svect_all;
-        case oneapi::mkl::jobsvd::O: return rocblas_svect_overwrite;
-        case oneapi::mkl::jobsvd::S: return rocblas_svect_singular;
+        case oneapi::math::jobsvd::N: return rocblas_svect_none;
+        case oneapi::math::jobsvd::A: return rocblas_svect_all;
+        case oneapi::math::jobsvd::O: return rocblas_svect_overwrite;
+        case oneapi::math::jobsvd::S: return rocblas_svect_singular;
         default: throw "Wrong jobsvd.";
     }
 }
 
-inline rocblas_operation get_rocblas_operation(oneapi::mkl::transpose trn) {
+inline rocblas_operation get_rocblas_operation(oneapi::math::transpose trn) {
     switch (trn) {
-        case oneapi::mkl::transpose::nontrans: return rocblas_operation_none;
-        case oneapi::mkl::transpose::trans: return rocblas_operation_transpose;
-        case oneapi::mkl::transpose::conjtrans: return rocblas_operation_conjugate_transpose;
+        case oneapi::math::transpose::nontrans: return rocblas_operation_none;
+        case oneapi::math::transpose::trans: return rocblas_operation_transpose;
+        case oneapi::math::transpose::conjtrans: return rocblas_operation_conjugate_transpose;
         default: throw "Wrong transpose Operation.";
     }
 }
 
-inline rocblas_fill get_rocblas_fill_mode(oneapi::mkl::uplo ul) {
+inline rocblas_fill get_rocblas_fill_mode(oneapi::math::uplo ul) {
     switch (ul) {
-        case oneapi::mkl::uplo::upper: return rocblas_fill_upper;
-        case oneapi::mkl::uplo::lower: return rocblas_fill_lower;
+        case oneapi::math::uplo::upper: return rocblas_fill_upper;
+        case oneapi::math::uplo::lower: return rocblas_fill_lower;
         default: throw "Wrong fill mode.";
     }
 }
 
-inline rocblas_side get_rocblas_side_mode(oneapi::mkl::side lr) {
+inline rocblas_side get_rocblas_side_mode(oneapi::math::side lr) {
     switch (lr) {
-        case oneapi::mkl::side::left: return rocblas_side_left;
-        case oneapi::mkl::side::right: return rocblas_side_right;
+        case oneapi::math::side::left: return rocblas_side_left;
+        case oneapi::math::side::right: return rocblas_side_right;
         default: throw "Wrong side mode.";
     }
 }
 
-inline rocblas_storev get_rocblas_generate(oneapi::mkl::generate qp) {
+inline rocblas_storev get_rocblas_generate(oneapi::math::generate qp) {
     switch (qp) {
-        case oneapi::mkl::generate::Q: return rocblas_column_wise;
-        case oneapi::mkl::generate::P: return rocblas_row_wise;
+        case oneapi::math::generate::Q: return rocblas_column_wise;
+        case oneapi::math::generate::P: return rocblas_row_wise;
         default: throw "Wrong generate.";
     }
 }
@@ -275,13 +275,13 @@ inline void lapack_info_check(sycl::queue& queue, DEVINFO_T devinfo, const char*
     queue.wait();
     const int devinfo_ = get_rocsolver_devinfo(queue, devinfo);
     if (devinfo_ > 0)
-        throw oneapi::mkl::lapack::computation_error(
+        throw oneapi::math::lapack::computation_error(
             func_name, std::string(cufunc_name) + " failed with info = " + std::to_string(devinfo_),
             devinfo_);
 }
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif // _ROCSOLVER_HELPER_HPP_
diff --git a/src/lapack/backends/rocsolver/rocsolver_lapack.cpp b/src/lapack/backends/rocsolver/rocsolver_lapack.cpp
index 99a02c22a..5b0c265b2 100644
--- a/src/lapack/backends/rocsolver/rocsolver_lapack.cpp
+++ b/src/lapack/backends/rocsolver/rocsolver_lapack.cpp
@@ -21,11 +21,11 @@
 #include "rocsolver_helper.hpp"
 #include "rocsolver_task.hpp"
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
@@ -46,7 +46,7 @@ inline void gebrd(const char* func_name, Func func, sycl::queue& queue, std::int
         auto e_acc = e.template get_access<sycl::access::mode::write>(cgh);
         auto tauq_acc = tauq.template get_access<sycl::access::mode::write>(cgh);
         auto taup_acc = taup.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType_A*>(a_acc);
             auto d_ = sc.get_mem<rocmDataType_B*>(d_acc);
@@ -107,7 +107,7 @@ inline void geqrf(const char* func_name, Func func, sycl::queue& queue, std::int
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -150,7 +150,7 @@ void getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m,
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto ipiv32_acc = ipiv32.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto ipiv32_ = sc.get_mem<int*>(ipiv32_acc);
@@ -211,7 +211,7 @@ void getri(sycl::queue& queue, std::int64_t n, sycl::buffer<std::complex<double>
 
 template <typename Func, typename T>
 inline void getrs(const char* func_name, Func func, sycl::queue& queue,
-                  oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+                  oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                   sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                   sycl::buffer<T>& b, std::int64_t ldb, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -236,7 +236,7 @@ inline void getrs(const char* func_name, Func func, sycl::queue& queue,
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto ipiv_acc = ipiv32.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto ipiv_ = sc.get_mem<std::int32_t*>(ipiv_acc);
@@ -249,7 +249,7 @@ inline void getrs(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define GETRS_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,                  \
+    void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,                 \
                std::int64_t nrhs, sycl::buffer<TYPE>& a, std::int64_t lda,                        \
                sycl::buffer<std::int64_t>& ipiv, sycl::buffer<TYPE>& b, std::int64_t ldb,         \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                    \
@@ -265,8 +265,8 @@ GETRS_LAUNCHER(std::complex<double>, rocsolver_zgetrs)
 #undef GETRS_LAUNCHER
 
 template <typename Func, typename T_A, typename T_B>
-inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                  oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<T_A>& a,
+inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::jobsvd jobu,
+                  oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<T_A>& a,
                   std::int64_t lda, sycl::buffer<T_B>& s, sycl::buffer<T_A>& u, std::int64_t ldu,
                   sycl::buffer<T_A>& vt, std::int64_t ldvt, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -281,7 +281,7 @@ inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto vt_acc = vt.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType_A*>(a_acc);
             auto s_ = sc.get_mem<rocmDataType_B*>(s_acc);
@@ -300,7 +300,7 @@ inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define GESVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                         \
-    void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,           \
+    void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,         \
                std::int64_t m, std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda,         \
                sycl::buffer<TYPE_B>& s, sycl::buffer<TYPE_A>& u, std::int64_t ldu,                \
                sycl::buffer<TYPE_A>& vt, std::int64_t ldvt, sycl::buffer<TYPE_A>& scratchpad,     \
@@ -317,8 +317,8 @@ GESVD_LAUNCHER(std::complex<double>, double, rocsolver_zgesvd)
 #undef GESVD_LAUNCHER
 
 template <typename Func, typename T_A, typename T_B>
-inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<T_A>& a, std::int64_t lda,
+inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<T_A>& a, std::int64_t lda,
                   sycl::buffer<T_B>& w, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
     using rocmDataType_A = typename RocmEquivalentType<T_A>::Type;
@@ -330,7 +330,7 @@ inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType_A*>(a_acc);
             auto w_ = sc.get_mem<rocmDataType_B*>(w_acc);
@@ -345,12 +345,12 @@ inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                         \
-    void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \
-               sycl::buffer<TYPE_A>& a, std::int64_t lda, sycl::buffer<TYPE_B>& w,                \
-               sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                  \
-        heevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \
-              scratchpad_size);                                                                   \
+#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                          \
+    void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,                \
+               std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda, sycl::buffer<TYPE_B>& w, \
+               sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                   \
+        heevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad,  \
+              scratchpad_size);                                                                    \
     }
 
 HEEVD_LAUNCHER(std::complex<float>, float, rocsolver_cheevd)
@@ -360,7 +360,7 @@ HEEVD_LAUNCHER(std::complex<double>, double, rocsolver_zheevd)
 
 template <typename Func, typename T_A, typename T_B>
 inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                   sycl::buffer<T_A>& a, std::int64_t lda, sycl::buffer<T_A>& b, std::int64_t ldb,
                   sycl::buffer<T_B>& w, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -374,7 +374,7 @@ inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType_A*>(a_acc);
             auto b_ = sc.get_mem<rocmDataType_A*>(b_acc);
@@ -390,13 +390,13 @@ inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define HEGVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                         \
-    void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,                     \
-               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda, \
-               sycl::buffer<TYPE_A>& b, std::int64_t ldb, sycl::buffer<TYPE_B>& w,                \
-               sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                  \
-        hegvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, \
-              w, scratchpad, scratchpad_size);                                                    \
+#define HEGVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                          \
+    void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,                     \
+               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE_A>& a, std::int64_t lda, \
+               sycl::buffer<TYPE_A>& b, std::int64_t ldb, sycl::buffer<TYPE_B>& w,                 \
+               sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {                   \
+        hegvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb,  \
+              w, scratchpad, scratchpad_size);                                                     \
     }
 
 HEGVD_LAUNCHER(std::complex<float>, float, rocsolver_chegvd)
@@ -405,7 +405,7 @@ HEGVD_LAUNCHER(std::complex<double>, double, rocsolver_zhegvd)
 #undef HEGVD_LAUNCHER
 
 template <typename Func, typename T_A, typename T_B>
-inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T_A>& a, std::int64_t lda, sycl::buffer<T_B>& d,
                   sycl::buffer<T_B>& e, sycl::buffer<T_A>& tau, sycl::buffer<T_A>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -417,7 +417,7 @@ inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto d_acc = d.template get_access<sycl::access::mode::write>(cgh);
         auto e_acc = e.template get_access<sycl::access::mode::write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType_A*>(a_acc);
             auto d_ = sc.get_mem<rocmDataType_B*>(d_acc);
@@ -431,7 +431,7 @@ inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define HETRD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                               \
-    void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,              \
+    void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,             \
                sycl::buffer<TYPE_A>& a, std::int64_t lda, sycl::buffer<TYPE_B>& d,      \
                sycl::buffer<TYPE_B>& e, sycl::buffer<TYPE_A>& tau,                      \
                sycl::buffer<TYPE_A>& scratchpad, std::int64_t scratchpad_size) {        \
@@ -444,12 +444,12 @@ HETRD_LAUNCHER(std::complex<double>, double, rocsolver_zhetrd)
 
 #undef HETRD_LAUNCHER
 
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "hetrf");
 }
-void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
@@ -457,7 +457,7 @@ void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
 }
 
 template <typename Func, typename T>
-inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec,
+inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T>& a,
                   std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -466,7 +466,7 @@ inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -478,7 +478,7 @@ inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define ORGBR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
-    void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,      \
+    void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,     \
                std::int64_t k, sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau,   \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                     \
         orgbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \
@@ -499,7 +499,7 @@ inline void orgqr(const char* func_name, Func func, sycl::queue& queue, std::int
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -523,7 +523,7 @@ ORGQR_LAUNCHER(double, rocsolver_dorgqr)
 #undef ORGQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -531,7 +531,7 @@ inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -542,12 +542,12 @@ inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     });
 }
 
-#define ORGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,         \
-               std::int64_t scratchpad_size) {                                                    \
-        orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,     \
-              scratchpad_size);                                                                   \
+#define ORGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
+    void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,          \
+               std::int64_t scratchpad_size) {                                                     \
+        orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,      \
+              scratchpad_size);                                                                    \
     }
 
 ORGTR_LAUNCHER(float, rocsolver_sorgtr)
@@ -556,8 +556,8 @@ ORGTR_LAUNCHER(double, rocsolver_dorgtr)
 #undef ORGTR_LAUNCHER
 
 template <typename Func, typename T>
-inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& c, std::int64_t ldc, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -567,7 +567,7 @@ inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read_write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -581,8 +581,8 @@ inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define ORMTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,                \
-               oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,                      \
+    void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,              \
+               oneapi::math::transpose trans, std::int64_t m, std::int64_t n,                     \
                sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau,                  \
                sycl::buffer<TYPE>& c, std::int64_t ldc, sycl::buffer<TYPE>& scratchpad,           \
                std::int64_t scratchpad_size) {                                                    \
@@ -595,22 +595,22 @@ ORMTR_LAUNCHER(double, rocsolver_dormtr)
 
 #undef ORMTR_LAUNCHER
 
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
+void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<float>& tau, sycl::buffer<float>& c, std::int64_t ldc,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "ormrq");
 }
-void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
-           sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
+void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
+           std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c, std::int64_t ldc,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "ormrq");
 }
 
 template <typename Func, typename T>
-inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& c,
                   std::int64_t ldc, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -619,7 +619,7 @@ inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::read>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -633,7 +633,7 @@ inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define ORMQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
-    void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,           \
+    void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,         \
                std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<TYPE>& a,              \
                std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& c, std::int64_t ldc, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                     \
@@ -647,7 +647,7 @@ ORMQR_LAUNCHER(double, rocsolver_dormqr)
 #undef ORMQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -656,7 +656,7 @@ inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto devInfo_ = sc.get_mem<int*>(devInfo_acc);
@@ -668,11 +668,11 @@ inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define POTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {  \
-        potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,          \
-              scratchpad_size);                                                                   \
+#define POTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
+    void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {   \
+        potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,           \
+              scratchpad_size);                                                                    \
     }
 
 POTRF_LAUNCHER(float, rocsolver_spotrf)
@@ -683,7 +683,7 @@ POTRF_LAUNCHER(std::complex<double>, rocsolver_zpotrf)
 #undef POTRF_LAUNCHER
 
 template <typename Func, typename T>
-inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -692,7 +692,7 @@ inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto devInfo_ = sc.get_mem<int*>(devInfo_acc);
@@ -704,11 +704,11 @@ inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::
     lapack_info_check(queue, devInfo, __func__, func_name);
 }
 
-#define POTRI_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {  \
-        potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,          \
-              scratchpad_size);                                                                   \
+#define POTRI_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
+    void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {   \
+        potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad,           \
+              scratchpad_size);                                                                    \
     }
 
 POTRI_LAUNCHER(float, rocsolver_spotri)
@@ -719,7 +719,7 @@ POTRI_LAUNCHER(std::complex<double>, rocsolver_zpotri)
 #undef POTRI_LAUNCHER
 
 template <typename Func, typename T>
-inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::int64_t nrhs, sycl::buffer<T>& a, std::int64_t lda,
                   sycl::buffer<T>& b, std::int64_t ldb, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -728,7 +728,7 @@ inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read>(cgh);
         auto b_acc = b.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto b_ = sc.get_mem<rocmDataType*>(b_acc);
@@ -740,7 +740,7 @@ inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define POTRS_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                  \
-    void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,    \
+    void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs,   \
                sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& b, std::int64_t ldb, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                   \
         potrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb,       \
@@ -755,8 +755,8 @@ POTRS_LAUNCHER(std::complex<double>, rocsolver_zpotrs)
 #undef POTRS_LAUNCHER
 
 template <typename Func, typename T>
-inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<T>& a, std::int64_t lda,
+inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<T>& a, std::int64_t lda,
                   sycl::buffer<T>& w, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
     overflow_check(n, lda, scratchpad_size);
@@ -766,7 +766,7 @@ inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto w_ = sc.get_mem<rocmDataType*>(w_acc);
@@ -782,8 +782,8 @@ inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define SYEVD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \
-               sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& w,                    \
+    void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,               \
+               std::int64_t n, sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& w,    \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                    \
         syevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \
               scratchpad_size);                                                                   \
@@ -796,9 +796,9 @@ SYEVD_LAUNCHER(double, rocsolver_dsyevd)
 
 template <typename Func, typename T>
 inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<T>& a,
-                  std::int64_t lda, sycl::buffer<T>& b, std::int64_t ldb, sycl::buffer<T>& w,
-                  sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
+                  sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& b, std::int64_t ldb,
+                  sycl::buffer<T>& w, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
     overflow_check(n, lda, ldb, scratchpad_size);
     sycl::buffer<int> devInfo{ 1 };
@@ -808,7 +808,7 @@ inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int
         auto w_acc = w.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
         auto scratch_acc = scratchpad.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto b_ = sc.get_mem<rocmDataType*>(b_acc);
@@ -825,8 +825,8 @@ inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int
 }
 
 #define SYGVD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,                     \
-               oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, std::int64_t lda,   \
+    void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,                    \
+               oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, std::int64_t lda,  \
                sycl::buffer<TYPE>& b, std::int64_t ldb, sycl::buffer<TYPE>& w,                    \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                    \
         sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, \
@@ -839,7 +839,7 @@ SYGVD_LAUNCHER(double, rocsolver_dsygvd)
 #undef SYGVD_LAUNCH
 
 template <typename Func, typename T>
-inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& d,
                   sycl::buffer<T>& e, sycl::buffer<T>& tau, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -850,7 +850,7 @@ inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto d_acc = d.template get_access<sycl::access::mode::write>(cgh);
         auto e_acc = e.template get_access<sycl::access::mode::write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto d_ = sc.get_mem<rocmDataType*>(d_acc);
@@ -863,13 +863,13 @@ inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::
     });
 }
 
-#define SYTRD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& d, sycl::buffer<TYPE>& e,                    \
-               sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,                           \
-               std::int64_t scratchpad_size) {                                                    \
-        sytrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau,           \
-              scratchpad, scratchpad_size);                                                       \
+#define SYTRD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
+    void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& d, sycl::buffer<TYPE>& e,                     \
+               sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,                            \
+               std::int64_t scratchpad_size) {                                                     \
+        sytrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau,            \
+              scratchpad, scratchpad_size);                                                        \
     }
 
 SYTRD_LAUNCHER(float, rocsolver_ssytrd)
@@ -878,7 +878,7 @@ SYTRD_LAUNCHER(double, rocsolver_dsytrd)
 #undef SYTRD_LAUNCHER
 
 template <typename Func, typename T>
-inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda,
                   sycl::buffer<std::int64_t>& ipiv, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -896,7 +896,7 @@ inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto ipiv32_acc = ipiv32.template get_access<sycl::access::mode::write>(cgh);
         auto devInfo_acc = devInfo.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto ipiv32_ = sc.get_mem<int*>(ipiv32_acc);
@@ -920,7 +920,7 @@ inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define SYTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
-    void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a,  \
+    void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
                std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<TYPE>& scratchpad, \
                std::int64_t scratchpad_size) {                                                     \
         sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad,     \
@@ -934,27 +934,27 @@ SYTRF_LAUNCHER(std::complex<double>, rocsolver_zsytrf)
 
 #undef SYTRF_LAUNCHER
 
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "trtrs");
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "trtrs");
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "trtrs");
 }
-void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-           oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+           oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
@@ -962,7 +962,7 @@ void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose tr
 }
 
 template <typename Func, typename T>
-inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec,
+inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<T>& a,
                   std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -971,7 +971,7 @@ inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -983,7 +983,7 @@ inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define UNGBR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
-    void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n,      \
+    void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n,     \
                std::int64_t k, sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau,   \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                     \
         ungbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \
@@ -1004,7 +1004,7 @@ inline void ungqr(const char* func_name, Func func, sycl::queue& queue, std::int
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -1028,7 +1028,7 @@ UNGQR_LAUNCHER(std::complex<double>, rocsolver_zungqr)
 #undef UNGQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo,
+inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -1036,7 +1036,7 @@ inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     queue.submit([&](sycl::handler& cgh) {
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -1047,12 +1047,12 @@ inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
     });
 }
 
-#define UNGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
-               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,         \
-               std::int64_t scratchpad_size) {                                                    \
-        ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,     \
-              scratchpad_size);                                                                   \
+#define UNGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
+    void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<TYPE>& a, \
+               std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& scratchpad,          \
+               std::int64_t scratchpad_size) {                                                     \
+        ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad,      \
+              scratchpad_size);                                                                    \
     }
 
 UNGTR_LAUNCHER(std::complex<float>, rocsolver_cungtr)
@@ -1060,24 +1060,24 @@ UNGTR_LAUNCHER(std::complex<double>, rocsolver_zungtr)
 
 #undef UNGTR_LAUNCHER
 
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
-           std::int64_t scratchpad_size) {
+void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
+           sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "unmrq");
 }
-void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
-           std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
-           sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
-           std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
-           std::int64_t scratchpad_size) {
+void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
+           std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
+           std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
+           sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
+           sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     throw unimplemented("lapack", "unmrq");
 }
 
 template <typename Func, typename T>
-inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau, sycl::buffer<T>& c,
                   std::int64_t ldc, sycl::buffer<T>& scratchpad, std::int64_t scratchpad_size) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -1086,7 +1086,7 @@ inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -1100,7 +1100,7 @@ inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define UNMQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                    \
-    void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,           \
+    void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,         \
                std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<TYPE>& a,              \
                std::int64_t lda, sycl::buffer<TYPE>& tau, sycl::buffer<TYPE>& c, std::int64_t ldc, \
                sycl::buffer<TYPE>& scratchpad, std::int64_t scratchpad_size) {                     \
@@ -1114,8 +1114,8 @@ UNMQR_LAUNCHER(std::complex<double>, rocsolver_zunmqr)
 #undef UNMQR_LAUNCHER
 
 template <typename Func, typename T>
-inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, sycl::buffer<T>& a, std::int64_t lda, sycl::buffer<T>& tau,
                   sycl::buffer<T>& c, std::int64_t ldc, sycl::buffer<T>& scratchpad,
                   std::int64_t scratchpad_size) {
@@ -1125,7 +1125,7 @@ inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
         auto a_acc = a.template get_access<sycl::access::mode::read_write>(cgh);
         auto tau_acc = tau.template get_access<sycl::access::mode::write>(cgh);
         auto c_acc = c.template get_access<sycl::access::mode::read_write>(cgh);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = sc.get_mem<rocmDataType*>(a_acc);
             auto tau_ = sc.get_mem<rocmDataType*>(tau_acc);
@@ -1139,8 +1139,8 @@ inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::
 }
 
 #define UNMTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE)                                                   \
-    void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,                \
-               oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,                      \
+    void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,              \
+               oneapi::math::transpose trans, std::int64_t m, std::int64_t n,                     \
                sycl::buffer<TYPE>& a, std::int64_t lda, sycl::buffer<TYPE>& tau,                  \
                sycl::buffer<TYPE>& c, std::int64_t ldc, sycl::buffer<TYPE>& scratchpad,           \
                std::int64_t scratchpad_size) {                                                    \
@@ -1169,7 +1169,7 @@ inline sycl::event gebrd(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType_A*>(a);
             auto d_ = reinterpret_cast<rocmDataType_B*>(d);
@@ -1233,7 +1233,7 @@ inline sycl::event geqrf(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -1279,7 +1279,7 @@ inline sycl::event getrf(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto devInfo_ = reinterpret_cast<int*>(devInfo);
@@ -1343,7 +1343,7 @@ sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex<double>* a, s
 
 template <typename Func, typename T>
 inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T* a,
+                         oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, T* a,
                          std::int64_t lda, std::int64_t* ipiv, T* b, std::int64_t ldb,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1368,7 +1368,7 @@ inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue,
             cgh.depends_on(dependencies[i]);
         }
         cgh.depends_on(done_casting);
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto ipiv_ = reinterpret_cast<int*>(ipiv32);
@@ -1387,7 +1387,7 @@ inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define GETRS_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                              \
-    sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,          \
+    sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,         \
                       std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t* ipiv, TYPE* b, \
                       std::int64_t ldb, TYPE* scratchpad, std::int64_t scratchpad_size,          \
                       const std::vector<sycl::event>& dependencies) {                            \
@@ -1404,7 +1404,7 @@ GETRS_LAUNCHER_USM(std::complex<double>, rocsolver_zgetrs)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                         oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m,
                          std::int64_t n, T_A* a, std::int64_t lda, T_B* s, T_A* u, std::int64_t ldu,
                          T_A* vt, std::int64_t ldvt, T_A* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1417,7 +1417,7 @@ inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType_A*>(a);
             auto s_ = reinterpret_cast<rocmDataType_B*>(s);
@@ -1438,7 +1438,7 @@ inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define GESVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                    \
-    sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,   \
+    sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \
                       std::int64_t m, std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* s,    \
                       TYPE_A* u, std::int64_t ldu, TYPE_A* vt, std::int64_t ldvt,                \
                       TYPE_A* scratchpad, std::int64_t scratchpad_size,                          \
@@ -1456,7 +1456,7 @@ GESVD_LAUNCHER_USM(std::complex<double>, double, rocsolver_zgesvd)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a,
                          std::int64_t lda, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType_A = typename RocmEquivalentType<T_A>::Type;
@@ -1468,7 +1468,7 @@ inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType_A*>(a);
             auto w_ = reinterpret_cast<rocmDataType_B*>(w);
@@ -1486,7 +1486,7 @@ inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define HEEVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                     \
-    sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,          \
+    sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,        \
                       std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* w, TYPE_A* scratchpad, \
                       std::int64_t scratchpad_size,                                               \
                       const std::vector<sycl::event>& dependencies) {                             \
@@ -1501,7 +1501,7 @@ HEEVD_LAUNCHER_USM(std::complex<double>, double, rocsolver_zheevd)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a,
                          std::int64_t lda, T_A*& b, std::int64_t ldb, T_B*& w, T_A*& scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1514,7 +1514,7 @@ inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType_A*>(a);
             auto b_ = reinterpret_cast<rocmDataType_A*>(b);
@@ -1533,8 +1533,8 @@ inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, s
 }
 
 #define HEGVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                    \
-    sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,             \
-                      oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda,       \
+    sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,            \
+                      oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda,      \
                       TYPE_A* b, std::int64_t ldb, TYPE_B* w, TYPE_A* scratchpad,                \
                       std::int64_t scratchpad_size,                                              \
                       const std::vector<sycl::event>& dependencies) {                            \
@@ -1549,7 +1549,7 @@ HEGVD_LAUNCHER_USM(std::complex<double>, double, rocsolver_zhegvd)
 
 template <typename Func, typename T_A, typename T_B>
 inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d,
+                         oneapi::math::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d,
                          T_B* e, T_A* tau, T_A* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType_A = typename RocmEquivalentType<T_A>::Type;
@@ -1560,7 +1560,7 @@ inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType_A*>(a);
             auto d_ = reinterpret_cast<rocmDataType_B*>(d);
@@ -1575,7 +1575,7 @@ inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define HETRD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE)                                  \
-    sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a,   \
+    sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a,  \
                       std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tau, TYPE_A* scratchpad, \
                       std::int64_t scratchpad_size,                                            \
                       const std::vector<sycl::event>& dependencies) {                          \
@@ -1588,13 +1588,13 @@ HETRD_LAUNCHER_USM(std::complex<double>, double, rocsolver_zhetrd)
 
 #undef HETRD_LAUNCHER_USM
 
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "hetrf");
 }
-sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -1603,7 +1603,7 @@ sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
 
 template <typename Func, typename T>
 inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          T* a, std::int64_t lda, T* tau, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1614,7 +1614,7 @@ inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -1627,7 +1627,7 @@ inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define ORGBR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                           \
-    sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,          \
+    sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,         \
                       std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau,   \
                       TYPE* scratchpad, std::int64_t scratchpad_size,                         \
                       const std::vector<sycl::event>& dependencies) {                         \
@@ -1652,7 +1652,7 @@ inline sycl::event orgqr(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -1678,7 +1678,7 @@ ORGQR_LAUNCHER_USM(double, rocsolver_dorgqr)
 
 template <typename Func, typename T>
 inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -1688,7 +1688,7 @@ inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -1701,7 +1701,7 @@ inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define ORGTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                                \
-    sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,         \
+    sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,        \
                       std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \
                       const std::vector<sycl::event>& dependencies) {                              \
         return orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau,           \
@@ -1715,8 +1715,8 @@ ORGTR_LAUNCHER_USM(double, rocsolver_dorgtr)
 
 template <typename Func, typename T>
 inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a,
                          std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1727,7 +1727,7 @@ inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -1742,8 +1742,8 @@ inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define ORMTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                             \
-    sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,       \
-                      oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,    \
+    sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,     \
+                      oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,   \
                       std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \
                       std::int64_t scratchpad_size,                                             \
                       const std::vector<sycl::event>& dependencies) {                           \
@@ -1756,13 +1756,13 @@ ORMTR_LAUNCHER_USM(double, rocsolver_dormtr)
 
 #undef ORMTR_LAUNCHER_USM
 
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                   float* tau, float* c, std::int64_t ldc, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "ormrq");
 }
-sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                   double* tau, double* c, std::int64_t ldc, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1771,7 +1771,7 @@ sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::trans
 
 template <typename Func, typename T>
 inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c,
                          std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1782,7 +1782,7 @@ inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -1796,14 +1796,14 @@ inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue,
     return done;
 }
 
-#define ORMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                              \
-    sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,  \
-                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \
-                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                    \
-                      std::int64_t scratchpad_size,                                              \
-                      const std::vector<sycl::event>& dependencies) {                            \
-        return ormqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \
-                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                    \
+#define ORMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                               \
+    sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \
+                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda,  \
+                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                     \
+                      std::int64_t scratchpad_size,                                               \
+                      const std::vector<sycl::event>& dependencies) {                             \
+        return ormqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda,  \
+                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                     \
     }
 
 ORMQR_LAUNCHER_USM(float, rocsolver_sormqr)
@@ -1813,7 +1813,7 @@ ORMQR_LAUNCHER_USM(double, rocsolver_dormqr)
 
 template <typename Func, typename T>
 inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -1824,7 +1824,7 @@ inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto devInfo_ = reinterpret_cast<int*>(devInfo);
@@ -1839,7 +1839,7 @@ inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define POTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                             \
-    sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,      \
+    sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,     \
                       std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size,         \
                       const std::vector<sycl::event>& dependencies) {                           \
         return potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \
@@ -1855,7 +1855,7 @@ POTRF_LAUNCHER_USM(std::complex<double>, rocsolver_zpotrf)
 
 template <typename Func, typename T>
 inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -1866,7 +1866,7 @@ inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto scratch_ = reinterpret_cast<rocmDataType*>(scratchpad);
@@ -1882,7 +1882,7 @@ inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define POTRI_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                             \
-    sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,      \
+    sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,     \
                       std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size,         \
                       const std::vector<sycl::event>& dependencies) {                           \
         return potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \
@@ -1898,7 +1898,7 @@ POTRI_LAUNCHER_USM(std::complex<double>, rocsolver_zpotri)
 
 template <typename Func, typename T>
 inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a,
+                         oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a,
                          std::int64_t lda, T* b, std::int64_t ldb, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1909,7 +1909,7 @@ inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto b_ = reinterpret_cast<rocmDataType*>(b);
@@ -1922,7 +1922,7 @@ inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define POTRS_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                               \
-    sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,                 \
+    sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,                \
                       std::int64_t nrhs, TYPE* a, std::int64_t lda, TYPE* b, std::int64_t ldb,    \
                       TYPE* scratchpad, std::int64_t scratchpad_size,                             \
                       const std::vector<sycl::event>& dependencies) {                             \
@@ -1939,7 +1939,7 @@ POTRS_LAUNCHER_USM(std::complex<double>, rocsolver_zpotrs)
 
 template <typename Func, typename T>
 inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a,
                          std::int64_t lda, T* w, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -1950,7 +1950,7 @@ inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto w_ = reinterpret_cast<rocmDataType*>(w);
@@ -1968,7 +1968,7 @@ inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define SYEVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                          \
-    sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,     \
+    sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,   \
                       std::int64_t n, TYPE* a, std::int64_t lda, TYPE* w, TYPE* scratchpad,  \
                       std::int64_t scratchpad_size,                                          \
                       const std::vector<sycl::event>& dependencies) {                        \
@@ -1983,7 +1983,7 @@ SYEVD_LAUNCHER_USM(double, rocsolver_dsyevd)
 
 template <typename Func, typename T>
 inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype,
-                         oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a,
+                         oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a,
                          std::int64_t lda, T* b, std::int64_t ldb, T* w, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -1995,7 +1995,7 @@ inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto b_ = reinterpret_cast<rocmDataType*>(b);
@@ -2013,13 +2013,13 @@ inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, s
     return done;
 }
 
-#define SYGVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                               \
-    sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,              \
-                      oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \
-                      std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size,  \
-                      const std::vector<sycl::event>& dependencies) {                             \
-        return sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda,  \
-                     b, ldb, w, scratchpad, scratchpad_size, dependencies);                       \
+#define SYGVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                                \
+    sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,              \
+                      oneapi::math::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \
+                      std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size,   \
+                      const std::vector<sycl::event>& dependencies) {                              \
+        return sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda,   \
+                     b, ldb, w, scratchpad, scratchpad_size, dependencies);                        \
     }
 
 SYGVD_LAUNCHER_USM(float, rocsolver_ssygvd)
@@ -2029,8 +2029,8 @@ SYGVD_LAUNCHER_USM(double, rocsolver_dsygvd)
 
 template <typename Func, typename T>
 inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d, T* e,
-                         T* tau, T* scratchpad, std::int64_t scratchpad_size,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d,
+                         T* e, T* tau, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
     overflow_check(n, lda, scratchpad_size);
@@ -2039,7 +2039,7 @@ inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto d_ = reinterpret_cast<rocmDataType*>(d);
@@ -2054,7 +2054,7 @@ inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define SYTRD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                            \
-    sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,     \
+    sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,    \
                       std::int64_t lda, TYPE* d, TYPE* e, TYPE* tau, TYPE* scratchpad,         \
                       std::int64_t scratchpad_size,                                            \
                       const std::vector<sycl::event>& dependencies) {                          \
@@ -2069,7 +2069,7 @@ SYTRD_LAUNCHER_USM(double, rocsolver_dsytrd)
 
 template <typename Func, typename T>
 inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda,
                          std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -2087,7 +2087,7 @@ inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto ipiv_ = reinterpret_cast<int*>(ipiv32);
@@ -2112,13 +2112,13 @@ inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue,
     return done_casting;
 }
 
-#define SYTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                        \
-    sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \
-                      std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad,              \
-                      std::int64_t scratchpad_size,                                        \
-                      const std::vector<sycl::event>& dependencies) {                      \
-        return sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv,  \
-                     scratchpad, scratchpad_size, dependencies);                           \
+#define SYTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                         \
+    sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \
+                      std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad,               \
+                      std::int64_t scratchpad_size,                                         \
+                      const std::vector<sycl::event>& dependencies) {                       \
+        return sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv,   \
+                     scratchpad, scratchpad_size, dependencies);                            \
     }
 
 SYTRF_LAUNCHER_USM(float, rocsolver_ssytrf)
@@ -2128,27 +2128,27 @@ SYTRF_LAUNCHER_USM(std::complex<double>, rocsolver_zsytrf)
 
 #undef SYTRF_LAUNCHER_USM
 
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
-                  std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
-                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+                  std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
+                  std::int64_t ldb, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "trtrs");
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double* a,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a,
                   std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "trtrs");
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a,
                   std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "trtrs");
 }
-sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                  oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                  oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                   std::int64_t ldb, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -2157,7 +2157,7 @@ sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::trans
 
 template <typename Func, typename T>
 inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
+                         oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k,
                          T* a, std::int64_t lda, T* tau, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -2168,7 +2168,7 @@ inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -2181,7 +2181,7 @@ inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define UNGBR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                           \
-    sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,          \
+    sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,         \
                       std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau,   \
                       TYPE* scratchpad, std::int64_t scratchpad_size,                         \
                       const std::vector<sycl::event>& dependencies) {                         \
@@ -2206,7 +2206,7 @@ inline sycl::event ungqr(const char* func_name, Func func, sycl::queue& queue, s
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -2232,7 +2232,7 @@ UNGQR_LAUNCHER_USM(std::complex<double>, rocsolver_zungqr)
 
 template <typename Func, typename T>
 inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
+                         oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau,
                          T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
     using rocmDataType = typename RocmEquivalentType<T>::Type;
@@ -2242,7 +2242,7 @@ inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -2255,7 +2255,7 @@ inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define UNGTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                                \
-    sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a,         \
+    sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a,        \
                       std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \
                       const std::vector<sycl::event>& dependencies) {                              \
         return ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau,           \
@@ -2267,14 +2267,14 @@ UNGTR_LAUNCHER_USM(std::complex<double>, rocsolver_zungtr)
 
 #undef UNGTR_LAUNCHER_USM
 
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float>* a,
                   std::int64_t lda, std::complex<float>* tau, std::complex<float>* c,
                   std::int64_t ldc, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     throw unimplemented("lapack", "unmrq");
 }
-sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double>* a,
                   std::int64_t lda, std::complex<double>* tau, std::complex<double>* c,
                   std::int64_t ldc, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -2284,7 +2284,7 @@ sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::trans
 
 template <typename Func, typename T>
 inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m,
+                         oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
                          std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c,
                          std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -2295,7 +2295,7 @@ inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -2309,14 +2309,14 @@ inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue,
     return done;
 }
 
-#define UNMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                              \
-    sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,  \
-                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \
-                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                    \
-                      std::int64_t scratchpad_size,                                              \
-                      const std::vector<sycl::event>& dependencies) {                            \
-        return unmqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \
-                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                    \
+#define UNMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                               \
+    sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \
+                      std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda,  \
+                      TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad,                     \
+                      std::int64_t scratchpad_size,                                               \
+                      const std::vector<sycl::event>& dependencies) {                             \
+        return unmqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda,  \
+                     tau, c, ldc, scratchpad, scratchpad_size, dependencies);                     \
     }
 
 UNMQR_LAUNCHER_USM(std::complex<float>, rocsolver_cunmqr)
@@ -2326,8 +2326,8 @@ UNMQR_LAUNCHER_USM(std::complex<double>, rocsolver_zunmqr)
 
 template <typename Func, typename T>
 inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue,
-                         oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                         oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a,
+                         oneapi::math::side side, oneapi::math::uplo uplo,
+                         oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a,
                          std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad,
                          std::int64_t scratchpad_size,
                          const std::vector<sycl::event>& dependencies) {
@@ -2338,7 +2338,7 @@ inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue,
         for (int64_t i = 0; i < num_events; i++) {
             cgh.depends_on(dependencies[i]);
         }
-        onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
+        onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) {
             auto handle = sc.get_handle(queue);
             auto a_ = reinterpret_cast<rocmDataType*>(a);
             auto tau_ = reinterpret_cast<rocmDataType*>(tau);
@@ -2353,8 +2353,8 @@ inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue,
 }
 
 #define UNMTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE)                                             \
-    sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,       \
-                      oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,    \
+    sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,     \
+                      oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a,   \
                       std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \
                       std::int64_t scratchpad_size,                                             \
                       const std::vector<sycl::event>& dependencies) {                           \
@@ -2418,12 +2418,12 @@ GEQRF_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef GEQRF_LAUNCHER_SCRATCH
 
-#define GESVD_LAUNCHER_SCRATCH(TYPE)                                                              \
-    template <>                                                                                   \
-    std::int64_t gesvd_scratchpad_size<TYPE>(                                                     \
-        sycl::queue & queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, \
-        std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) {                  \
-        return std::min(m, n) - 1;                                                                \
+#define GESVD_LAUNCHER_SCRATCH(TYPE)                                                             \
+    template <>                                                                                  \
+    std::int64_t gesvd_scratchpad_size<TYPE>(                                                    \
+        sycl::queue & queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,              \
+        std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { \
+        return std::min(m, n) - 1;                                                               \
     }
 
 GESVD_LAUNCHER_SCRATCH(float)
@@ -2468,7 +2468,7 @@ std::int64_t getri_scratchpad_size<std::complex<double>>(sycl::queue& queue, std
 
 #define GETRS_LAUNCHER_SCRATCH(TYPE)                                                              \
     template <>                                                                                   \
-    std::int64_t getrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::transpose trans,   \
+    std::int64_t getrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::transpose trans,  \
                                              std::int64_t n, std::int64_t nrhs, std::int64_t lda, \
                                              std::int64_t ldb) {                                  \
         return 0;                                                                                 \
@@ -2481,12 +2481,12 @@ GETRS_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef GETRS_LAUNCHER_SCRATCH
 
-#define HEEVD_LAUNCHER_SCRATCH(TYPE)                                                     \
-    template <>                                                                          \
-    std::int64_t heevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::job jobz, \
-                                             oneapi::mkl::uplo uplo, std::int64_t n,     \
-                                             std::int64_t lda) {                         \
-        return n;                                                                        \
+#define HEEVD_LAUNCHER_SCRATCH(TYPE)                                                      \
+    template <>                                                                           \
+    std::int64_t heevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::job jobz, \
+                                             oneapi::math::uplo uplo, std::int64_t n,     \
+                                             std::int64_t lda) {                          \
+        return n;                                                                         \
     }
 
 HEEVD_LAUNCHER_SCRATCH(std::complex<float>)
@@ -2497,7 +2497,7 @@ HEEVD_LAUNCHER_SCRATCH(std::complex<double>)
 #define HEGVD_LAUNCHER_SCRATCH(TYPE)                                                               \
     template <>                                                                                    \
     std::int64_t hegvd_scratchpad_size<TYPE>(sycl::queue & queue, std::int64_t itype,              \
-                                             oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,        \
+                                             oneapi::math::job jobz, oneapi::math::uplo uplo,      \
                                              std::int64_t n, std::int64_t lda, std::int64_t ldb) { \
         return n;                                                                                  \
     }
@@ -2507,11 +2507,11 @@ HEGVD_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef HEGVD_LAUNCHER_SCRATCH
 
-#define HETRD_LAUNCHER_SCRATCH(TYPE)                                                      \
-    template <>                                                                           \
-    std::int64_t hetrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        return 0;                                                                         \
+#define HETRD_LAUNCHER_SCRATCH(TYPE)                                                       \
+    template <>                                                                            \
+    std::int64_t hetrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        return 0;                                                                          \
     }
 
 HETRD_LAUNCHER_SCRATCH(std::complex<float>)
@@ -2520,22 +2520,23 @@ HETRD_LAUNCHER_SCRATCH(std::complex<double>)
 #undef HETRD_LAUNCHER_SCRATCH
 
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t hetrf_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     throw unimplemented("lapack", "hetrf_scratchpad_size");
 }
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t hetrf_scratchpad_size<std::complex<double>>(sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     throw unimplemented("lapack", "hetrf_scratchpad_size");
 }
 
-#define ORGBR_LAUNCHER_SCRATCH(TYPE)                                                         \
-    template <>                                                                              \
-    std::int64_t orgbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::generate vec, \
-                                             std::int64_t m, std::int64_t n, std::int64_t k, \
-                                             std::int64_t lda) {                             \
-        return 0;                                                                            \
+#define ORGBR_LAUNCHER_SCRATCH(TYPE)                                                          \
+    template <>                                                                               \
+    std::int64_t orgbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::generate vec, \
+                                             std::int64_t m, std::int64_t n, std::int64_t k,  \
+                                             std::int64_t lda) {                              \
+        return 0;                                                                             \
     }
 
 ORGBR_LAUNCHER_SCRATCH(float)
@@ -2543,11 +2544,11 @@ ORGBR_LAUNCHER_SCRATCH(double)
 
 #undef ORGBR_LAUNCHER_SCRATCH
 
-#define ORGTR_LAUNCHER_SCRATCH(TYPE)                                                      \
-    template <>                                                                           \
-    std::int64_t orgtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        return 0;                                                                         \
+#define ORGTR_LAUNCHER_SCRATCH(TYPE)                                                       \
+    template <>                                                                            \
+    std::int64_t orgtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        return 0;                                                                          \
     }
 
 ORGTR_LAUNCHER_SCRATCH(float)
@@ -2568,26 +2569,26 @@ ORGQR_LAUNCHER_SCRATCH(double)
 #undef ORGQR_LAUNCHER_SCRATCH
 
 template <>
-std::int64_t ormrq_scratchpad_size<float>(sycl::queue& queue, oneapi::mkl::side side,
-                                          oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormrq_scratchpad_size<float>(sycl::queue& queue, oneapi::math::side side,
+                                          oneapi::math::transpose trans, std::int64_t m,
                                           std::int64_t n, std::int64_t k, std::int64_t lda,
                                           std::int64_t ldc) {
     throw unimplemented("lapack", "ormrq_scratchpad_size");
 }
 template <>
-std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::mkl::side side,
-                                           oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormrq_scratchpad_size<double>(sycl::queue& queue, oneapi::math::side side,
+                                           oneapi::math::transpose trans, std::int64_t m,
                                            std::int64_t n, std::int64_t k, std::int64_t lda,
                                            std::int64_t ldc) {
     throw unimplemented("lapack", "ormrq_scratchpad_size");
 }
 
-#define ORMQRF_LAUNCHER_SCRATCH(TYPE)                                                              \
-    template <>                                                                                    \
-    std::int64_t ormqr_scratchpad_size<TYPE>(                                                      \
-        sycl::queue & queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, \
-        std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {                      \
-        return 0;                                                                                  \
+#define ORMQRF_LAUNCHER_SCRATCH(TYPE)                                                         \
+    template <>                                                                               \
+    std::int64_t ormqr_scratchpad_size<TYPE>(                                                 \
+        sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans,          \
+        std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \
+        return 0;                                                                             \
     }
 
 ORMQRF_LAUNCHER_SCRATCH(float)
@@ -2597,10 +2598,10 @@ ORMQRF_LAUNCHER_SCRATCH(double)
 
 #define ORMTR_LAUNCHER_SCRATCH(TYPE)                                                               \
     template <>                                                                                    \
-    std::int64_t ormtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::side side,          \
-                                             oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, \
-                                             std::int64_t m, std::int64_t n, std::int64_t lda,     \
-                                             std::int64_t ldc) {                                   \
+    std::int64_t ormtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::side side,         \
+                                             oneapi::math::uplo uplo,                              \
+                                             oneapi::math::transpose trans, std::int64_t m,        \
+                                             std::int64_t n, std::int64_t lda, std::int64_t ldc) { \
         return 0;                                                                                  \
     }
 
@@ -2609,11 +2610,11 @@ ORMTR_LAUNCHER_SCRATCH(double)
 
 #undef ORMTR_LAUNCHER_SCRATCH
 
-#define POTRF_LAUNCHER_SCRATCH(TYPE)                                                      \
-    template <>                                                                           \
-    std::int64_t potrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        return 0;                                                                         \
+#define POTRF_LAUNCHER_SCRATCH(TYPE)                                                       \
+    template <>                                                                            \
+    std::int64_t potrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        return 0;                                                                          \
     }
 
 POTRF_LAUNCHER_SCRATCH(float)
@@ -2625,7 +2626,7 @@ POTRF_LAUNCHER_SCRATCH(std::complex<double>)
 
 #define POTRS_LAUNCHER_SCRATCH(TYPE)                                                              \
     template <>                                                                                   \
-    std::int64_t potrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo,         \
+    std::int64_t potrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo,        \
                                              std::int64_t n, std::int64_t nrhs, std::int64_t lda, \
                                              std::int64_t ldb) {                                  \
         return 0;                                                                                 \
@@ -2638,11 +2639,11 @@ POTRS_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef POTRS_LAUNCHER_SCRATCH
 
-#define POTRI_LAUNCHER_SCRATCH(TYPE)                                                      \
-    template <>                                                                           \
-    std::int64_t potri_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        return 0;                                                                         \
+#define POTRI_LAUNCHER_SCRATCH(TYPE)                                                       \
+    template <>                                                                            \
+    std::int64_t potri_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        return 0;                                                                          \
     }
 
 POTRI_LAUNCHER_SCRATCH(float)
@@ -2652,11 +2653,11 @@ POTRI_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef POTRI_LAUNCHER_SCRATCH
 
-#define SYTRF_LAUNCHER_SCRATCH(TYPE)                                                      \
-    template <>                                                                           \
-    std::int64_t sytrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        return 0;                                                                         \
+#define SYTRF_LAUNCHER_SCRATCH(TYPE)                                                       \
+    template <>                                                                            \
+    std::int64_t sytrf_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        return 0;                                                                          \
     }
 
 SYTRF_LAUNCHER_SCRATCH(float)
@@ -2666,12 +2667,12 @@ SYTRF_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef SYTRF_LAUNCHER_SCRATCH
 
-#define SYEVD_LAUNCHER_SCRATCH(TYPE)                                                     \
-    template <>                                                                          \
-    std::int64_t syevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::job jobz, \
-                                             oneapi::mkl::uplo uplo, std::int64_t n,     \
-                                             std::int64_t lda) {                         \
-        return n;                                                                        \
+#define SYEVD_LAUNCHER_SCRATCH(TYPE)                                                      \
+    template <>                                                                           \
+    std::int64_t syevd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::job jobz, \
+                                             oneapi::math::uplo uplo, std::int64_t n,     \
+                                             std::int64_t lda) {                          \
+        return n;                                                                         \
     }
 
 SYEVD_LAUNCHER_SCRATCH(float)
@@ -2682,7 +2683,7 @@ SYEVD_LAUNCHER_SCRATCH(double)
 #define SYGVD_LAUNCHER_SCRATCH(TYPE)                                                               \
     template <>                                                                                    \
     std::int64_t sygvd_scratchpad_size<TYPE>(sycl::queue & queue, std::int64_t itype,              \
-                                             oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,        \
+                                             oneapi::math::job jobz, oneapi::math::uplo uplo,      \
                                              std::int64_t n, std::int64_t lda, std::int64_t ldb) { \
         return n;                                                                                  \
     }
@@ -2692,11 +2693,11 @@ SYGVD_LAUNCHER_SCRATCH(double)
 
 #undef SYGVD_LAUNCHER_SCRATCH
 
-#define SYTRD_LAUNCHER_SCRATCH(TYPE)                                                      \
-    template <>                                                                           \
-    std::int64_t sytrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        return 0;                                                                         \
+#define SYTRD_LAUNCHER_SCRATCH(TYPE)                                                       \
+    template <>                                                                            \
+    std::int64_t sytrd_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        return 0;                                                                          \
     }
 
 SYTRD_LAUNCHER_SCRATCH(float)
@@ -2704,13 +2705,13 @@ SYTRD_LAUNCHER_SCRATCH(double)
 
 #undef SYTRD_LAUNCHER_SCRATCH
 
-#define TRTRS_LAUNCHER_SCRATCH(TYPE)                                                               \
-    template <>                                                                                    \
-    std::int64_t trtrs_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo,          \
-                                             oneapi::mkl::transpose trans, oneapi::mkl::diag diag, \
-                                             std::int64_t n, std::int64_t nrhs, std::int64_t lda,  \
-                                             std::int64_t ldb) {                                   \
-        return 0;                                                                                  \
+#define TRTRS_LAUNCHER_SCRATCH(TYPE)                                                  \
+    template <>                                                                       \
+    std::int64_t trtrs_scratchpad_size<TYPE>(                                         \
+        sycl::queue & queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,  \
+        oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, \
+        std::int64_t ldb) {                                                           \
+        return 0;                                                                     \
     }
 
 TRTRS_LAUNCHER_SCRATCH(float)
@@ -2720,12 +2721,12 @@ TRTRS_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef TRTRS_LAUNCHER_SCRATCH
 
-#define UNGBR_LAUNCHER_SCRATCH(TYPE)                                                         \
-    template <>                                                                              \
-    std::int64_t ungbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::generate vec, \
-                                             std::int64_t m, std::int64_t n, std::int64_t k, \
-                                             std::int64_t lda) {                             \
-        return 0;                                                                            \
+#define UNGBR_LAUNCHER_SCRATCH(TYPE)                                                          \
+    template <>                                                                               \
+    std::int64_t ungbr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::generate vec, \
+                                             std::int64_t m, std::int64_t n, std::int64_t k,  \
+                                             std::int64_t lda) {                              \
+        return 0;                                                                             \
     }
 
 UNGBR_LAUNCHER_SCRATCH(std::complex<float>)
@@ -2745,11 +2746,11 @@ UNGQR_LAUNCHER_SCRATCH(std::complex<double>)
 
 #undef UNGQR_LAUNCHER_SCRATCH
 
-#define UNGTR_LAUNCHER_SCRATCH(TYPE)                                                      \
-    template <>                                                                           \
-    std::int64_t ungtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::uplo uplo, \
-                                             std::int64_t n, std::int64_t lda) {          \
-        return 0;                                                                         \
+#define UNGTR_LAUNCHER_SCRATCH(TYPE)                                                       \
+    template <>                                                                            \
+    std::int64_t ungtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::uplo uplo, \
+                                             std::int64_t n, std::int64_t lda) {           \
+        return 0;                                                                          \
     }
 
 UNGTR_LAUNCHER_SCRATCH(std::complex<float>)
@@ -2758,28 +2759,26 @@ UNGTR_LAUNCHER_SCRATCH(std::complex<double>)
 #undef UNGTR_LAUNCHER_SCRATCH
 
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmrq_scratchpad_size<std::complex<float>>(sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t k, std::int64_t lda,
                                                         std::int64_t ldc) {
     throw unimplemented("lapack", "unmrq_scratchpad_size");
 }
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<double>>(sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc) {
+std::int64_t unmrq_scratchpad_size<std::complex<double>>(
+    sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m,
+    std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {
     throw unimplemented("lapack", "unmrq_scratchpad_size");
 }
 
-#define UNMQR_LAUNCHER_SCRATCH(TYPE)                                                               \
-    template <>                                                                                    \
-    std::int64_t unmqr_scratchpad_size<TYPE>(                                                      \
-        sycl::queue & queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, \
-        std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) {                      \
-        return 0;                                                                                  \
+#define UNMQR_LAUNCHER_SCRATCH(TYPE)                                                          \
+    template <>                                                                               \
+    std::int64_t unmqr_scratchpad_size<TYPE>(                                                 \
+        sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans,          \
+        std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \
+        return 0;                                                                             \
     }
 
 UNMQR_LAUNCHER_SCRATCH(std::complex<float>)
@@ -2789,10 +2788,10 @@ UNMQR_LAUNCHER_SCRATCH(std::complex<double>)
 
 #define UNMTR_LAUNCHER_SCRATCH(TYPE)                                                               \
     template <>                                                                                    \
-    std::int64_t unmtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::mkl::side side,          \
-                                             oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, \
-                                             std::int64_t m, std::int64_t n, std::int64_t lda,     \
-                                             std::int64_t ldc) {                                   \
+    std::int64_t unmtr_scratchpad_size<TYPE>(sycl::queue & queue, oneapi::math::side side,         \
+                                             oneapi::math::uplo uplo,                              \
+                                             oneapi::math::transpose trans, std::int64_t m,        \
+                                             std::int64_t n, std::int64_t lda, std::int64_t ldc) { \
         return 0;                                                                                  \
     }
 
@@ -2803,5 +2802,5 @@ UNMTR_LAUNCHER_SCRATCH(std::complex<double>)
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp b/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp
index 559cf1cb6..264515d6e 100644
--- a/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp
+++ b/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp
@@ -26,7 +26,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
@@ -37,7 +37,7 @@ namespace rocsolver {
  * takes place if no other element in the container has a key equivalent to
  * the one being emplaced (keys in a map container are unique).
  */
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
 thread_local rocsolver_handle<ur_context_handle_t> RocsolverScopedContextHandler::handle_helper =
     rocsolver_handle<ur_context_handle_t>{};
 #else
@@ -100,7 +100,7 @@ rocblas_handle RocsolverScopedContextHandler::get_handle(const sycl::queue& queu
     hipError_t hipErr;
     hipCtx_t desired;
     HIP_ERROR_FUNC(hipDevicePrimaryCtxRetain, hipErr, &desired, hipDevice);
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
     auto piPlacedContext_ = reinterpret_cast<ur_context_handle_t>(desired);
 #else
     auto piPlacedContext_ = reinterpret_cast<pi_context>(desired);
@@ -151,5 +151,5 @@ sycl::context RocsolverScopedContextHandler::get_context(const sycl::queue& queu
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp b/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp
index 19b26a2df..4d3b12a8c 100644
--- a/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp
+++ b/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp
@@ -36,8 +36,8 @@
 // After Plugin Interface removal in DPC++ ur.hpp is the new include
 #if __has_include(<sycl/detail/ur.hpp>)
 #include <sycl/detail/ur.hpp>
-#ifndef ONEMKL_PI_INTERFACE_REMOVED
-#define ONEMKL_PI_INTERFACE_REMOVED
+#ifndef ONEMATH_PI_INTERFACE_REMOVED
+#define ONEMATH_PI_INTERFACE_REMOVED
 #endif
 #elif __has_include(<sycl/detail/pi.hpp>)
 #include <sycl/detail/pi.hpp>
@@ -46,7 +46,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
@@ -55,7 +55,7 @@ class RocsolverScopedContextHandler {
     sycl::context* placedContext_;
     bool needToRecover_;
     sycl::interop_handle& ih;
-#ifdef ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEMATH_PI_INTERFACE_REMOVED
     static thread_local rocsolver_handle<ur_context_handle_t> handle_helper;
 #else
     static thread_local rocsolver_handle<pi_context> handle_helper;
@@ -80,6 +80,6 @@ class RocsolverScopedContextHandler {
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 #endif //_ROCSOLVER_SCOPED_HANDLE_HPP_
diff --git a/src/lapack/backends/rocsolver/rocsolver_task.hpp b/src/lapack/backends/rocsolver/rocsolver_task.hpp
index b3b89a8b8..c89734bd3 100644
--- a/src/lapack/backends/rocsolver/rocsolver_task.hpp
+++ b/src/lapack/backends/rocsolver/rocsolver_task.hpp
@@ -19,8 +19,8 @@
 *
 **************************************************************************/
 
-#ifndef _MKL_LAPACK_ROCSOLVER_TASK_HPP_
-#define _MKL_LAPACK_ROCSOLVER_TASK_HPP_
+#ifndef ONEMATH_LAPACK_ROCSOLVER_TASK_HPP_
+#define ONEMATH_LAPACK_ROCSOLVER_TASK_HPP_
 #include <hip/hip_runtime.h>
 #include <rocblas/rocblas.h>
 #include <rocsolver/rocsolver.h>
@@ -30,14 +30,14 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include "rocsolver_scope_handle.hpp"
 
 // After Plugin Interface removal in DPC++ ur.hpp is the new include
 #if __has_include(<sycl/detail/ur.hpp>)
 #include <sycl/detail/ur.hpp>
-#ifndef ONEMKL_PI_INTERFACE_REMOVED
-#define ONEMKL_PI_INTERFACE_REMOVED
+#ifndef ONEMATH_PI_INTERFACE_REMOVED
+#define ONEMATH_PI_INTERFACE_REMOVED
 #endif
 #elif __has_include(<sycl/detail/pi.hpp>)
 #include <sycl/detail/pi.hpp>
@@ -46,7 +46,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace rocsolver {
 
@@ -63,12 +63,12 @@ static inline void host_task_internal(H& cgh, sycl::queue queue, F f) {
 }
 
 template <typename H, typename F>
-static inline void onemkl_rocsolver_host_task(H& cgh, sycl::queue queue, F f) {
+static inline void onemath_rocsolver_host_task(H& cgh, sycl::queue queue, F f) {
     (void)host_task_internal(cgh, queue, f);
 }
 
 } // namespace rocsolver
 } // namespace lapack
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
-#endif // _MKL_LAPACK_ROCSOLVER_TASK_HPP_
+#endif // ONEMATH_LAPACK_ROCSOLVER_TASK_HPP_
diff --git a/src/lapack/backends/rocsolver/rocsolver_wrappers.cpp b/src/lapack/backends/rocsolver/rocsolver_wrappers.cpp
index 8613cc05e..ac383582f 100644
--- a/src/lapack/backends/rocsolver/rocsolver_wrappers.cpp
+++ b/src/lapack/backends/rocsolver/rocsolver_wrappers.cpp
@@ -19,410 +19,410 @@
 *
 **************************************************************************/
 #include "lapack/function_table.hpp"
-#include "oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hpp"
+#include "oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT lapack_function_table_t mkl_lapack_table = {
+extern "C" ONEMATH_EXPORT lapack_function_table_t onemath_lapack_table = {
     WRAPPER_VERSION,
 #define LAPACK_BACKEND rocsolver
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::heevd,
-    oneapi::mkl::lapack::rocsolver::heevd,
-    oneapi::mkl::lapack::rocsolver::hegvd,
-    oneapi::mkl::lapack::rocsolver::hegvd,
-    oneapi::mkl::lapack::rocsolver::hetrd,
-    oneapi::mkl::lapack::rocsolver::hetrd,
-    oneapi::mkl::lapack::rocsolver::hetrf,
-    oneapi::mkl::lapack::rocsolver::hetrf,
-    oneapi::mkl::lapack::rocsolver::orgbr,
-    oneapi::mkl::lapack::rocsolver::orgbr,
-    oneapi::mkl::lapack::rocsolver::orgqr,
-    oneapi::mkl::lapack::rocsolver::orgqr,
-    oneapi::mkl::lapack::rocsolver::orgtr,
-    oneapi::mkl::lapack::rocsolver::orgtr,
-    oneapi::mkl::lapack::rocsolver::ormtr,
-    oneapi::mkl::lapack::rocsolver::ormtr,
-    oneapi::mkl::lapack::rocsolver::ormrq,
-    oneapi::mkl::lapack::rocsolver::ormrq,
-    oneapi::mkl::lapack::rocsolver::ormqr,
-    oneapi::mkl::lapack::rocsolver::ormqr,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::syevd,
-    oneapi::mkl::lapack::rocsolver::syevd,
-    oneapi::mkl::lapack::rocsolver::sygvd,
-    oneapi::mkl::lapack::rocsolver::sygvd,
-    oneapi::mkl::lapack::rocsolver::sytrd,
-    oneapi::mkl::lapack::rocsolver::sytrd,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::ungbr,
-    oneapi::mkl::lapack::rocsolver::ungbr,
-    oneapi::mkl::lapack::rocsolver::ungqr,
-    oneapi::mkl::lapack::rocsolver::ungqr,
-    oneapi::mkl::lapack::rocsolver::ungtr,
-    oneapi::mkl::lapack::rocsolver::ungtr,
-    oneapi::mkl::lapack::rocsolver::unmrq,
-    oneapi::mkl::lapack::rocsolver::unmrq,
-    oneapi::mkl::lapack::rocsolver::unmqr,
-    oneapi::mkl::lapack::rocsolver::unmqr,
-    oneapi::mkl::lapack::rocsolver::unmtr,
-    oneapi::mkl::lapack::rocsolver::unmtr,
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gebrd,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::gerqf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::geqrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getrf,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getri,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::getrs,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::gesvd,
-    oneapi::mkl::lapack::rocsolver::heevd,
-    oneapi::mkl::lapack::rocsolver::heevd,
-    oneapi::mkl::lapack::rocsolver::hegvd,
-    oneapi::mkl::lapack::rocsolver::hegvd,
-    oneapi::mkl::lapack::rocsolver::hetrd,
-    oneapi::mkl::lapack::rocsolver::hetrd,
-    oneapi::mkl::lapack::rocsolver::hetrf,
-    oneapi::mkl::lapack::rocsolver::hetrf,
-    oneapi::mkl::lapack::rocsolver::orgbr,
-    oneapi::mkl::lapack::rocsolver::orgbr,
-    oneapi::mkl::lapack::rocsolver::orgqr,
-    oneapi::mkl::lapack::rocsolver::orgqr,
-    oneapi::mkl::lapack::rocsolver::orgtr,
-    oneapi::mkl::lapack::rocsolver::orgtr,
-    oneapi::mkl::lapack::rocsolver::ormtr,
-    oneapi::mkl::lapack::rocsolver::ormtr,
-    oneapi::mkl::lapack::rocsolver::ormrq,
-    oneapi::mkl::lapack::rocsolver::ormrq,
-    oneapi::mkl::lapack::rocsolver::ormqr,
-    oneapi::mkl::lapack::rocsolver::ormqr,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potrf,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potri,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::potrs,
-    oneapi::mkl::lapack::rocsolver::syevd,
-    oneapi::mkl::lapack::rocsolver::syevd,
-    oneapi::mkl::lapack::rocsolver::sygvd,
-    oneapi::mkl::lapack::rocsolver::sygvd,
-    oneapi::mkl::lapack::rocsolver::sytrd,
-    oneapi::mkl::lapack::rocsolver::sytrd,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::sytrf,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::trtrs,
-    oneapi::mkl::lapack::rocsolver::ungbr,
-    oneapi::mkl::lapack::rocsolver::ungbr,
-    oneapi::mkl::lapack::rocsolver::ungqr,
-    oneapi::mkl::lapack::rocsolver::ungqr,
-    oneapi::mkl::lapack::rocsolver::ungtr,
-    oneapi::mkl::lapack::rocsolver::ungtr,
-    oneapi::mkl::lapack::rocsolver::unmrq,
-    oneapi::mkl::lapack::rocsolver::unmrq,
-    oneapi::mkl::lapack::rocsolver::unmqr,
-    oneapi::mkl::lapack::rocsolver::unmqr,
-    oneapi::mkl::lapack::rocsolver::unmtr,
-    oneapi::mkl::lapack::rocsolver::unmtr,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getrf_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getri_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::getrs_batch,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrf_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::potrs_batch,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch,
-    oneapi::mkl::lapack::rocsolver::gebrd_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::gebrd_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::gebrd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::gebrd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::gerqf_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::gerqf_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::gerqf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::gerqf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::geqrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::geqrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::geqrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::geqrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::gesvd_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::gesvd_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::gesvd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::gesvd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getri_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getri_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getri_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getri_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::heevd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::heevd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::hegvd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::hegvd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::hetrd_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::hetrd_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::hetrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::hetrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::orgbr_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::orgbr_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::orgtr_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::orgtr_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::orgqr_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::orgqr_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::ormrq_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::ormrq_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::ormqr_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::ormqr_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::ormtr_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::ormtr_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::potrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::potrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::potrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::potrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::potrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::potri_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::potri_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potri_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::potri_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::sytrf_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::sytrf_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::sytrf_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::sytrf_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::syevd_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::syevd_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::sygvd_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::sygvd_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::sytrd_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::sytrd_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::trtrs_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::trtrs_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::trtrs_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::trtrs_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::ungbr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::ungbr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::ungqr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::ungqr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::ungtr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::ungtr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::unmrq_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::unmrq_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::unmqr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::unmqr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::unmtr_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::unmtr_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::orgqr_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<float>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<double>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<double>>,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<float>>,
-    oneapi::mkl::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<double>>
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::heevd,
+    oneapi::math::lapack::rocsolver::heevd,
+    oneapi::math::lapack::rocsolver::hegvd,
+    oneapi::math::lapack::rocsolver::hegvd,
+    oneapi::math::lapack::rocsolver::hetrd,
+    oneapi::math::lapack::rocsolver::hetrd,
+    oneapi::math::lapack::rocsolver::hetrf,
+    oneapi::math::lapack::rocsolver::hetrf,
+    oneapi::math::lapack::rocsolver::orgbr,
+    oneapi::math::lapack::rocsolver::orgbr,
+    oneapi::math::lapack::rocsolver::orgqr,
+    oneapi::math::lapack::rocsolver::orgqr,
+    oneapi::math::lapack::rocsolver::orgtr,
+    oneapi::math::lapack::rocsolver::orgtr,
+    oneapi::math::lapack::rocsolver::ormtr,
+    oneapi::math::lapack::rocsolver::ormtr,
+    oneapi::math::lapack::rocsolver::ormrq,
+    oneapi::math::lapack::rocsolver::ormrq,
+    oneapi::math::lapack::rocsolver::ormqr,
+    oneapi::math::lapack::rocsolver::ormqr,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::syevd,
+    oneapi::math::lapack::rocsolver::syevd,
+    oneapi::math::lapack::rocsolver::sygvd,
+    oneapi::math::lapack::rocsolver::sygvd,
+    oneapi::math::lapack::rocsolver::sytrd,
+    oneapi::math::lapack::rocsolver::sytrd,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::ungbr,
+    oneapi::math::lapack::rocsolver::ungbr,
+    oneapi::math::lapack::rocsolver::ungqr,
+    oneapi::math::lapack::rocsolver::ungqr,
+    oneapi::math::lapack::rocsolver::ungtr,
+    oneapi::math::lapack::rocsolver::ungtr,
+    oneapi::math::lapack::rocsolver::unmrq,
+    oneapi::math::lapack::rocsolver::unmrq,
+    oneapi::math::lapack::rocsolver::unmqr,
+    oneapi::math::lapack::rocsolver::unmqr,
+    oneapi::math::lapack::rocsolver::unmtr,
+    oneapi::math::lapack::rocsolver::unmtr,
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gebrd,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::gerqf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::geqrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getrf,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getri,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::getrs,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::gesvd,
+    oneapi::math::lapack::rocsolver::heevd,
+    oneapi::math::lapack::rocsolver::heevd,
+    oneapi::math::lapack::rocsolver::hegvd,
+    oneapi::math::lapack::rocsolver::hegvd,
+    oneapi::math::lapack::rocsolver::hetrd,
+    oneapi::math::lapack::rocsolver::hetrd,
+    oneapi::math::lapack::rocsolver::hetrf,
+    oneapi::math::lapack::rocsolver::hetrf,
+    oneapi::math::lapack::rocsolver::orgbr,
+    oneapi::math::lapack::rocsolver::orgbr,
+    oneapi::math::lapack::rocsolver::orgqr,
+    oneapi::math::lapack::rocsolver::orgqr,
+    oneapi::math::lapack::rocsolver::orgtr,
+    oneapi::math::lapack::rocsolver::orgtr,
+    oneapi::math::lapack::rocsolver::ormtr,
+    oneapi::math::lapack::rocsolver::ormtr,
+    oneapi::math::lapack::rocsolver::ormrq,
+    oneapi::math::lapack::rocsolver::ormrq,
+    oneapi::math::lapack::rocsolver::ormqr,
+    oneapi::math::lapack::rocsolver::ormqr,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potrf,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potri,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::potrs,
+    oneapi::math::lapack::rocsolver::syevd,
+    oneapi::math::lapack::rocsolver::syevd,
+    oneapi::math::lapack::rocsolver::sygvd,
+    oneapi::math::lapack::rocsolver::sygvd,
+    oneapi::math::lapack::rocsolver::sytrd,
+    oneapi::math::lapack::rocsolver::sytrd,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::sytrf,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::trtrs,
+    oneapi::math::lapack::rocsolver::ungbr,
+    oneapi::math::lapack::rocsolver::ungbr,
+    oneapi::math::lapack::rocsolver::ungqr,
+    oneapi::math::lapack::rocsolver::ungqr,
+    oneapi::math::lapack::rocsolver::ungtr,
+    oneapi::math::lapack::rocsolver::ungtr,
+    oneapi::math::lapack::rocsolver::unmrq,
+    oneapi::math::lapack::rocsolver::unmrq,
+    oneapi::math::lapack::rocsolver::unmqr,
+    oneapi::math::lapack::rocsolver::unmqr,
+    oneapi::math::lapack::rocsolver::unmtr,
+    oneapi::math::lapack::rocsolver::unmtr,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::orgqr_batch,
+    oneapi::math::lapack::rocsolver::orgqr_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::ungqr_batch,
+    oneapi::math::lapack::rocsolver::ungqr_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::orgqr_batch,
+    oneapi::math::lapack::rocsolver::orgqr_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::ungqr_batch,
+    oneapi::math::lapack::rocsolver::ungqr_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::geqrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getrf_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getri_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::getrs_batch,
+    oneapi::math::lapack::rocsolver::orgqr_batch,
+    oneapi::math::lapack::rocsolver::orgqr_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrf_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::potrs_batch,
+    oneapi::math::lapack::rocsolver::ungqr_batch,
+    oneapi::math::lapack::rocsolver::ungqr_batch,
+    oneapi::math::lapack::rocsolver::gebrd_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::gebrd_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::gebrd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::gebrd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::gerqf_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::gerqf_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::gerqf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::gerqf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::geqrf_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::geqrf_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::geqrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::geqrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::gesvd_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::gesvd_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::gesvd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::gesvd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getrf_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getrf_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getri_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getri_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getri_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getri_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getrs_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getrs_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::heevd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::heevd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::hegvd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::hegvd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::hetrd_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::hetrd_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::hetrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::hetrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::orgbr_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::orgbr_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::orgtr_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::orgtr_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::orgqr_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::orgqr_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::ormrq_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::ormrq_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::ormqr_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::ormqr_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::ormtr_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::ormtr_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrf_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::potrf_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::potrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::potrs_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::potrs_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::potrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::potri_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::potri_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potri_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::potri_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::sytrf_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::sytrf_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::sytrf_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::sytrf_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::syevd_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::syevd_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::sygvd_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::sygvd_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::sytrd_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::sytrd_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::trtrs_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::trtrs_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::trtrs_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::trtrs_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::ungbr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::ungbr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::ungqr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::ungqr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::ungtr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::ungtr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::unmrq_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::unmrq_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::unmqr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::unmqr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::unmtr_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::unmtr_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::orgqr_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::orgqr_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::orgqr_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::orgqr_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<float>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<double>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size<std::complex<double>>,
+    oneapi::math::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<float>>,
+    oneapi::math::lapack::rocsolver::ungqr_batch_scratchpad_size<std::complex<double>>
 #undef LAPACK_BACKEND
 };
diff --git a/src/lapack/function_table.hpp b/src/lapack/function_table.hpp
index dee8b8d8e..93c56450f 100644
--- a/src/lapack/function_table.hpp
+++ b/src/lapack/function_table.hpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 typedef struct {
     int version;
@@ -116,95 +116,95 @@ typedef struct {
                         std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*cgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
                         std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*dgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b, std::int64_t ldb,
                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-    void (*sgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*sgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b, std::int64_t ldb,
                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-    void (*zgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*zgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& b,
                         std::int64_t ldb, sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+    void (*dgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                         std::int64_t m, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                         sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
                         sycl::buffer<double>& vt, std::int64_t ldvt,
                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-    void (*sgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+    void (*sgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                         std::int64_t m, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                         sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
                         sycl::buffer<float>& vt, std::int64_t ldvt, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+    void (*cgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<float>>& a,
                         std::int64_t lda, sycl::buffer<float>& s,
                         sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
                         sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+    void (*zgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                         std::int64_t m, std::int64_t n, sycl::buffer<std::complex<double>>& a,
                         std::int64_t lda, sycl::buffer<double>& s,
                         sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
                         sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cheevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+    void (*cheevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                         std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zheevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+    void (*zheevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                         std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*chegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                        oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*chegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                        oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                         sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zhegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                        oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zhegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                        oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                         sycl::buffer<double>& w, sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*chetrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*chetrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<float>& d, sycl::buffer<float>& e,
                         sycl::buffer<std::complex<float>>& tau,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zhetrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zhetrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<double>& d, sycl::buffer<double>& e,
                         sycl::buffer<std::complex<double>>& tau,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*chetrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*chetrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zhetrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zhetrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*sorgbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    void (*sorgbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                         std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
                         sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dorgbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    void (*dorgbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                         std::int64_t n, std::int64_t k, sycl::buffer<double>& a, std::int64_t lda,
                         sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
@@ -214,158 +214,158 @@ typedef struct {
     void (*sorgqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-    void (*sorgtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*sorgtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-    void (*dorgtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dorgtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-    void (*sormtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                        oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    void (*sormtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                        oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
                         sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dormtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                        oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    void (*dormtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                        oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
                         sycl::buffer<double>& c, std::int64_t ldc, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*sormrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*sormrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dormrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*dormrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dormqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*dormqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
                         std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& c,
                         std::int64_t ldc, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*sormqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*sormqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a,
                         std::int64_t lda, sycl::buffer<float>& tau, sycl::buffer<float>& c,
                         std::int64_t ldc, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*spotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*spotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dpotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dpotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cpotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*cpotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zpotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zpotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*spotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*spotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dpotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dpotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cpotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*cpotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zpotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zpotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*spotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*spotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                         sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dpotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dpotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                         sycl::buffer<double>& b, std::int64_t ldb, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cpotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*cpotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zpotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zpotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dsyevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+    void (*dsyevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                         std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*ssyevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+    void (*ssyevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo,
                         std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dsygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                        oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+    void (*dsygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                        oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
                         std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
                         sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*ssygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                        oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+    void (*ssygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                        oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
                         std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
                         sycl::buffer<float>& w, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dsytrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dsytrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
                         sycl::buffer<double>& e, sycl::buffer<double>& tau,
                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-    void (*ssytrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*ssytrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d,
                         sycl::buffer<float>& e, sycl::buffer<float>& tau,
                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-    void (*ssytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*ssytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                         sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-    void (*dsytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dsytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
                         sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-    void (*csytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*csytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zsytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zsytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::int64_t>& ipiv,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*ctrtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+    void (*ctrtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                        oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*dtrtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+    void (*dtrtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                        oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                         sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
                         std::int64_t ldb, sycl::buffer<double>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*strtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+    void (*strtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                        oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                         sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
                         std::int64_t ldb, sycl::buffer<float>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*ztrtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                        oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
+    void (*ztrtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                        oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cungbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    void (*cungbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
                         std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zungbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    void (*zungbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                         std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
                         std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
                         sycl::buffer<std::complex<double>>& scratchpad,
@@ -380,53 +380,53 @@ typedef struct {
                         sycl::buffer<std::complex<double>>& tau,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cungtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*cungtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& tau,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zungtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zungtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& tau,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cunmrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*cunmrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& tau,
                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zunmrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*zunmrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& tau,
                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cunmqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*cunmqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& tau,
                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zunmqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans,
+    void (*zunmqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans,
                         std::int64_t m, std::int64_t n, std::int64_t k,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& tau,
                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
                         sycl::buffer<std::complex<double>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*cunmtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                        oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    void (*cunmtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                        oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                         sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<float>>& tau,
                         sycl::buffer<std::complex<float>>& c, std::int64_t ldc,
                         sycl::buffer<std::complex<float>>& scratchpad,
                         std::int64_t scratchpad_size);
-    void (*zunmtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                        oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    void (*zunmtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo,
+                        oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                         sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                         sycl::buffer<std::complex<double>>& tau,
                         sycl::buffer<std::complex<double>>& c, std::int64_t ldc,
@@ -518,97 +518,99 @@ typedef struct {
                                    std::int64_t lda, std::int64_t* ipiv,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                   std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
-                                   std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
-                                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+    sycl::event (*cgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
+                                   std::int64_t lda, std::int64_t* ipiv, std::complex<float>* b,
+                                   std::int64_t ldb, std::complex<float>* scratchpad,
+                                   std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                   std::int64_t nrhs, double* a, std::int64_t lda,
+    sycl::event (*dgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
                                    std::int64_t* ipiv, double* b, std::int64_t ldb,
                                    double* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*sgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                   std::int64_t nrhs, float* a, std::int64_t lda,
+    sycl::event (*sgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
                                    std::int64_t* ipiv, float* b, std::int64_t ldb,
                                    float* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
-                                   std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
-                                   std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
-                                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
+    sycl::event (*zgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
+                                   std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
+                                   std::int64_t lda, std::int64_t* ipiv, std::complex<double>* b,
+                                   std::int64_t ldb, std::complex<double>* scratchpad,
+                                   std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+    sycl::event (*dgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                    double* a, std::int64_t lda, double* s, double* u,
                                    std::int64_t ldu, double* vt, std::int64_t ldvt,
                                    double* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*sgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+    sycl::event (*sgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                    float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu,
                                    float* vt, std::int64_t ldvt, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+    sycl::event (*cgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                    std::complex<float>* a, std::int64_t lda, float* s,
                                    std::complex<float>* u, std::int64_t ldu,
                                    std::complex<float>* vt, std::int64_t ldvt,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                   oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+    sycl::event (*zgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                   oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                                    std::complex<double>* a, std::int64_t lda, double* s,
                                    std::complex<double>* u, std::int64_t ldu,
                                    std::complex<double>* vt, std::int64_t ldvt,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cheevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+    sycl::event (*cheevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                    std::int64_t lda, float* w, std::complex<float>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zheevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+    sycl::event (*zheevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                    std::int64_t lda, double* w, std::complex<double>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*chegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a,
+    sycl::event (*chegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a,
                                    std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                                    float* w, std::complex<float>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zhegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a,
+    sycl::event (*zhegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
                                    std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                                    double* w, std::complex<double>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*chetrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*chetrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<float>* a, std::int64_t lda, float* d, float* e,
                                    std::complex<float>* tau, std::complex<float>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zhetrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*zhetrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<double>* a, std::int64_t lda, double* d, double* e,
                                    std::complex<double>* tau, std::complex<double>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*chetrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*chetrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zhetrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*zhetrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*sorgbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    sycl::event (*sorgbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                    std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                                    float* tau, float* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dorgbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    sycl::event (*dorgbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                    std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                                    double* tau, double* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
@@ -620,176 +622,176 @@ typedef struct {
                                    std::int64_t k, float* a, std::int64_t lda, float* tau,
                                    float* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*sorgtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*sorgtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    float* a, std::int64_t lda, float* tau, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dorgtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*dorgtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    double* a, std::int64_t lda, double* tau, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*sormtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+    sycl::event (*sormtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, float* a, std::int64_t lda,
                                    float* tau, float* c, std::int64_t ldc, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dormtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+    sycl::event (*dormtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, double* a, std::int64_t lda,
                                    double* tau, double* c, std::int64_t ldc, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*sormrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*sormrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
                                    std::int64_t ldc, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dormrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*dormrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, double* a, std::int64_t lda, double* tau,
                                    double* c, std::int64_t ldc, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dormqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*dormqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, double* a, std::int64_t lda, double* tau,
                                    double* c, std::int64_t ldc, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*sormqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*sormqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, float* a, std::int64_t lda, float* tau, float* c,
                                    std::int64_t ldc, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*spotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*spotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    float* a, std::int64_t lda, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dpotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*dpotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    double* a, std::int64_t lda, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cpotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*cpotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<float>* a, std::int64_t lda,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zpotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*zpotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<double>* a, std::int64_t lda,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*spotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*spotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    float* a, std::int64_t lda, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dpotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*dpotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    double* a, std::int64_t lda, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cpotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*cpotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<float>* a, std::int64_t lda,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zpotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*zpotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<double>* a, std::int64_t lda,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*spotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*spotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t nrhs, float* a, std::int64_t lda, float* b,
                                    std::int64_t ldb, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dpotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*dpotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t nrhs, double* a, std::int64_t lda, double* b,
                                    std::int64_t ldb, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cpotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*cpotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                                    std::complex<float>* b, std::int64_t ldb,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zpotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*zpotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                                    std::complex<double>* b, std::int64_t ldb,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dsyevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+    sycl::event (*dsyevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, double* a,
                                    std::int64_t lda, double* w, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*ssyevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+    sycl::event (*ssyevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, float* a,
                                    std::int64_t lda, float* w, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dsygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+    sycl::event (*dsygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, double* a,
                                    std::int64_t lda, double* b, std::int64_t ldb, double* w,
                                    double* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*ssygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz,
-                                   oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+    sycl::event (*ssygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz,
+                                   oneapi::math::uplo uplo, std::int64_t n, float* a,
                                    std::int64_t lda, float* b, std::int64_t ldb, float* w,
                                    float* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dsytrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*dsytrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    double* a, std::int64_t lda, double* d, double* e, double* tau,
                                    double* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*ssytrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*ssytrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    float* a, std::int64_t lda, float* d, float* e, float* tau,
                                    float* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*ssytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*ssytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    float* a, std::int64_t lda, std::int64_t* ipiv,
                                    float* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dsytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*dsytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    double* a, std::int64_t lda, std::int64_t* ipiv,
                                    double* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*csytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*csytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zsytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*zsytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*ctrtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+    sycl::event (*ctrtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, oneapi::math::diag diag,
                                    std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
                                    std::int64_t lda, std::complex<float>* b, std::int64_t ldb,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*dtrtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+    sycl::event (*dtrtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, oneapi::math::diag diag,
                                    std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
                                    double* b, std::int64_t ldb, double* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*strtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+    sycl::event (*strtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, oneapi::math::diag diag,
                                    std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
                                    float* b, std::int64_t ldb, float* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*ztrtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                   oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
+    sycl::event (*ztrtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                   oneapi::math::transpose trans, oneapi::math::diag diag,
                                    std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
                                    std::int64_t lda, std::complex<double>* b, std::int64_t ldb,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cungbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    sycl::event (*cungbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                    std::int64_t n, std::int64_t k, std::complex<float>* a,
                                    std::int64_t lda, std::complex<float>* tau,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zungbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m,
+    sycl::event (*zungbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m,
                                    std::int64_t n, std::int64_t k, std::complex<double>* a,
                                    std::int64_t lda, std::complex<double>* tau,
                                    std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -804,53 +806,53 @@ typedef struct {
                                    std::complex<double>* tau, std::complex<double>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cungtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*cungtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<float>* a, std::int64_t lda,
                                    std::complex<float>* tau, std::complex<float>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zungtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    sycl::event (*zungtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                                    std::complex<double>* a, std::int64_t lda,
                                    std::complex<double>* tau, std::complex<double>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cunmrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*cunmrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::complex<float>* a, std::int64_t lda,
                                    std::complex<float>* tau, std::complex<float>* c,
                                    std::int64_t ldc, std::complex<float>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zunmrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*zunmrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::complex<double>* a, std::int64_t lda,
                                    std::complex<double>* tau, std::complex<double>* c,
                                    std::int64_t ldc, std::complex<double>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cunmqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*cunmqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::complex<float>* a, std::int64_t lda,
                                    std::complex<float>* tau, std::complex<float>* c,
                                    std::int64_t ldc, std::complex<float>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zunmqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+    sycl::event (*zunmqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
                                    std::int64_t k, std::complex<double>* a, std::int64_t lda,
                                    std::complex<double>* tau, std::complex<double>* c,
                                    std::int64_t ldc, std::complex<double>* scratchpad,
                                    std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*cunmtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+    sycl::event (*cunmtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::complex<float>* a,
                                    std::int64_t lda, std::complex<float>* tau,
                                    std::complex<float>* c, std::int64_t ldc,
                                    std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                                    const std::vector<sycl::event>& dependencies);
-    sycl::event (*zunmtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                   oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
+    sycl::event (*zunmtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                   oneapi::math::uplo uplo, oneapi::math::transpose trans,
                                    std::int64_t m, std::int64_t n, std::complex<double>* a,
                                    std::int64_t lda, std::complex<double>* tau,
                                    std::complex<double>* c, std::int64_t ldc,
@@ -900,19 +902,19 @@ typedef struct {
                               std::int64_t stride_ipiv, std::int64_t batch_size,
                               sycl::buffer<std::complex<double>>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*sgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*sgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                               std::int64_t stride_ipiv, sycl::buffer<float>& b, std::int64_t ldb,
                               std::int64_t stride_b, std::int64_t batch_size,
                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-    void (*dgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*dgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                               std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                               std::int64_t stride_ipiv, sycl::buffer<double>& b, std::int64_t ldb,
                               std::int64_t stride_b, std::int64_t batch_size,
                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-    void (*cgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*cgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
                               std::int64_t lda, std::int64_t stride_a,
                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
@@ -920,7 +922,7 @@ typedef struct {
                               std::int64_t stride_b, std::int64_t batch_size,
                               sycl::buffer<std::complex<float>>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*zgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    void (*zgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
                               std::int64_t lda, std::int64_t stride_a,
                               sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
@@ -960,42 +962,42 @@ typedef struct {
                               sycl::buffer<double>& tau, std::int64_t stride_tau,
                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*spotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*spotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                               std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*dpotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dpotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                               std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*cpotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*cpotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                               std::int64_t stride_a, std::int64_t batch_size,
                               sycl::buffer<std::complex<float>>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*zpotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zpotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                               std::int64_t stride_a, std::int64_t batch_size,
                               sycl::buffer<std::complex<double>>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*spotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*spotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                               std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
                               std::int64_t stride_b, std::int64_t batch_size,
                               sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size);
-    void (*dpotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*dpotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                               std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
                               std::int64_t stride_b, std::int64_t batch_size,
                               sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size);
-    void (*cpotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*cpotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
                               std::int64_t lda, std::int64_t stride_a,
                               sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
                               std::int64_t stride_b, std::int64_t batch_size,
                               sycl::buffer<std::complex<float>>& scratchpad,
                               std::int64_t scratchpad_size);
-    void (*zpotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    void (*zpotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
                               std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
                               std::int64_t lda, std::int64_t stride_a,
                               sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
@@ -1091,7 +1093,7 @@ typedef struct {
                                          std::complex<double>* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*sgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*sgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                          std::int64_t n, std::int64_t nrhs, float* a,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t* ipiv, std::int64_t stride_ipiv, float* b,
@@ -1099,7 +1101,7 @@ typedef struct {
                                          std::int64_t batch_size, float* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*dgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    sycl::event (*dgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                          std::int64_t n, std::int64_t nrhs, double* a,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::int64_t* ipiv, std::int64_t stride_ipiv, double* b,
@@ -1108,13 +1110,13 @@ typedef struct {
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
     sycl::event (*cgetrs_batch_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
         std::complex<float>* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
         std::int64_t stride_ipiv, std::complex<float>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
         const std::vector<sycl::event>& dependencies);
     sycl::event (*zgetrs_batch_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
         std::complex<double>* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
         std::int64_t stride_ipiv, std::complex<double>* b, std::int64_t ldb, std::int64_t stride_b,
         std::int64_t batch_size, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1131,50 +1133,52 @@ typedef struct {
                                          std::int64_t stride_tau, std::int64_t batch_size,
                                          double* scratchpad, std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*spotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         float* a, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t batch_size, float* scratchpad,
-                                         std::int64_t scratchpad_size,
+    sycl::event (*spotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, float* a, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t batch_size,
+                                         float* scratchpad, std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*dpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         double* a, std::int64_t lda, std::int64_t stride_a,
-                                         std::int64_t batch_size, double* scratchpad,
-                                         std::int64_t scratchpad_size,
+    sycl::event (*dpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, double* a, std::int64_t lda,
+                                         std::int64_t stride_a, std::int64_t batch_size,
+                                         double* scratchpad, std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*cpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::complex<float>* a, std::int64_t lda,
+    sycl::event (*cpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::complex<float>* a, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t batch_size,
                                          std::complex<float>* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*zpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::complex<double>* a, std::int64_t lda,
+    sycl::event (*zpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::complex<double>* a, std::int64_t lda,
                                          std::int64_t stride_a, std::int64_t batch_size,
                                          std::complex<double>* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*spotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t nrhs, float* a, std::int64_t lda,
-                                         std::int64_t stride_a, float* b, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size,
-                                         float* scratchpad, std::int64_t scratchpad_size,
+    sycl::event (*spotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t nrhs, float* a,
+                                         std::int64_t lda, std::int64_t stride_a, float* b,
+                                         std::int64_t ldb, std::int64_t stride_b,
+                                         std::int64_t batch_size, float* scratchpad,
+                                         std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*dpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t nrhs, double* a, std::int64_t lda,
-                                         std::int64_t stride_a, double* b, std::int64_t ldb,
-                                         std::int64_t stride_b, std::int64_t batch_size,
-                                         double* scratchpad, std::int64_t scratchpad_size,
+    sycl::event (*dpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t nrhs, double* a,
+                                         std::int64_t lda, std::int64_t stride_a, double* b,
+                                         std::int64_t ldb, std::int64_t stride_b,
+                                         std::int64_t batch_size, double* scratchpad,
+                                         std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*cpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t nrhs, std::complex<float>* a,
+    sycl::event (*cpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t nrhs, std::complex<float>* a,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::complex<float>* b, std::int64_t ldb,
                                          std::int64_t stride_b, std::int64_t batch_size,
                                          std::complex<float>* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*zpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
-                                         std::int64_t nrhs, std::complex<double>* a,
+    sycl::event (*zpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                         std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
                                          std::int64_t lda, std::int64_t stride_a,
                                          std::complex<double>* b, std::int64_t ldb,
                                          std::int64_t stride_b, std::int64_t batch_size,
@@ -1264,21 +1268,21 @@ typedef struct {
                                          std::complex<double>* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*sgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose* trans,
+    sycl::event (*sgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::transpose* trans,
                                          std::int64_t* n, std::int64_t* nrhs, float** a,
                                          std::int64_t* lda, std::int64_t** ipiv, float** b,
                                          std::int64_t* ldb, std::int64_t group_count,
                                          std::int64_t* group_sizes, float* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*dgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose* trans,
+    sycl::event (*dgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::transpose* trans,
                                          std::int64_t* n, std::int64_t* nrhs, double** a,
                                          std::int64_t* lda, std::int64_t** ipiv, double** b,
                                          std::int64_t* ldb, std::int64_t group_count,
                                          std::int64_t* group_sizes, double* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*cgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose* trans,
+    sycl::event (*cgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::transpose* trans,
                                          std::int64_t* n, std::int64_t* nrhs,
                                          std::complex<float>** a, std::int64_t* lda,
                                          std::int64_t** ipiv, std::complex<float>** b,
@@ -1287,7 +1291,7 @@ typedef struct {
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
     sycl::event (*zgetrs_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
         std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv, std::complex<double>** b,
         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1303,48 +1307,48 @@ typedef struct {
                                          std::int64_t* group_sizes, double* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*spotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    sycl::event (*spotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, float** a, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes,
                                          float* scratchpad, std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*dpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    sycl::event (*dpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, double** a, std::int64_t* lda,
                                          std::int64_t group_count, std::int64_t* group_sizes,
                                          double* scratchpad, std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*cpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    sycl::event (*cpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, std::complex<float>** a,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes, std::complex<float>* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*zpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    sycl::event (*zpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, std::complex<double>** a,
                                          std::int64_t* lda, std::int64_t group_count,
                                          std::int64_t* group_sizes,
                                          std::complex<double>* scratchpad,
                                          std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*spotrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    sycl::event (*spotrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, std::int64_t* nrhs, float** a,
                                          std::int64_t* lda, float** b, std::int64_t* ldb,
                                          std::int64_t group_count, std::int64_t* group_sizes,
                                          float* scratchpad, std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
-    sycl::event (*dpotrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    sycl::event (*dpotrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                          std::int64_t* n, std::int64_t* nrhs, double** a,
                                          std::int64_t* lda, double** b, std::int64_t* ldb,
                                          std::int64_t group_count, std::int64_t* group_sizes,
                                          double* scratchpad, std::int64_t scratchpad_size,
                                          const std::vector<sycl::event>& dependencies);
     sycl::event (*cpotrs_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+        sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
         std::complex<float>** a, std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* group_sizes, std::complex<float>* scratchpad,
         std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies);
     sycl::event (*zpotrs_group_usm_sycl)(
-        sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
+        sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs,
         std::complex<double>** a, std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
         std::int64_t group_count, std::int64_t* group_sizes, std::complex<double>* scratchpad,
         std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies);
@@ -1387,20 +1391,20 @@ typedef struct {
                                                 std::int64_t lda);
     std::int64_t (*zgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t lda);
-    std::int64_t (*sgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                                oneapi::mkl::jobsvd jobvt, std::int64_t m,
+    std::int64_t (*sgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                                oneapi::math::jobsvd jobvt, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldu,
                                                 std::int64_t ldvt);
-    std::int64_t (*dgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                                oneapi::mkl::jobsvd jobvt, std::int64_t m,
+    std::int64_t (*dgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                                oneapi::math::jobsvd jobvt, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldu,
                                                 std::int64_t ldvt);
-    std::int64_t (*cgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                                oneapi::mkl::jobsvd jobvt, std::int64_t m,
+    std::int64_t (*cgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                                oneapi::math::jobsvd jobvt, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldu,
                                                 std::int64_t ldvt);
-    std::int64_t (*zgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                                                oneapi::mkl::jobsvd jobvt, std::int64_t m,
+    std::int64_t (*zgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu,
+                                                oneapi::math::jobsvd jobvt, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldu,
                                                 std::int64_t ldvt);
     std::int64_t (*sgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n,
@@ -1419,185 +1423,185 @@ typedef struct {
                                                 std::int64_t lda);
     std::int64_t (*zgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n,
                                                 std::int64_t lda);
-    std::int64_t (*sgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    std::int64_t (*sgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*dgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    std::int64_t (*dgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*cgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    std::int64_t (*cgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*zgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans,
+    std::int64_t (*zgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*cheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                oneapi::mkl::uplo uplo, std::int64_t n,
+    std::int64_t (*cheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                                oneapi::math::uplo uplo, std::int64_t n,
                                                 std::int64_t lda);
-    std::int64_t (*zheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                oneapi::mkl::uplo uplo, std::int64_t n,
+    std::int64_t (*zheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                                oneapi::math::uplo uplo, std::int64_t n,
                                                 std::int64_t lda);
     std::int64_t (*chegvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype,
-                                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                                oneapi::math::job jobz, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldb);
     std::int64_t (*zhegvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype,
-                                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                                oneapi::math::job jobz, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldb);
-    std::int64_t (*chetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*chetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*zhetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zhetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*chetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*chetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*zhetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zhetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*sorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect,
+    std::int64_t (*sorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect,
                                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                                 std::int64_t lda);
-    std::int64_t (*dorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect,
+    std::int64_t (*dorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect,
                                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                                 std::int64_t lda);
-    std::int64_t (*sorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*sorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*dorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
     std::int64_t (*sorgqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t k, std::int64_t lda);
     std::int64_t (*dorgqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t k, std::int64_t lda);
-    std::int64_t (*sormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*sormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*dormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*dormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*sormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*sormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*dormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*dormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*sormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*sormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldc);
-    std::int64_t (*dormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*dormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldc);
-    std::int64_t (*spotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*spotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*dpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*cpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*cpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*zpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*spotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*spotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*dpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*cpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*cpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*zpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*spotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*spotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*dpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*cpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*cpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*zpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*ssytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*ssytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*dsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*csytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*csytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*zsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*ssyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                oneapi::mkl::uplo uplo, std::int64_t n,
+    std::int64_t (*ssyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                                oneapi::math::uplo uplo, std::int64_t n,
                                                 std::int64_t lda);
-    std::int64_t (*dsyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz,
-                                                oneapi::mkl::uplo uplo, std::int64_t n,
+    std::int64_t (*dsyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz,
+                                                oneapi::math::uplo uplo, std::int64_t n,
                                                 std::int64_t lda);
     std::int64_t (*ssygvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype,
-                                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                                oneapi::math::job jobz, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldb);
     std::int64_t (*dsygvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype,
-                                                oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+                                                oneapi::math::job jobz, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldb);
-    std::int64_t (*ssytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*ssytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*dsytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dsytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*strtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans,
-                                                oneapi::mkl::diag diag, std::int64_t n,
+    std::int64_t (*strtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans,
+                                                oneapi::math::diag diag, std::int64_t n,
                                                 std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*dtrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans,
-                                                oneapi::mkl::diag diag, std::int64_t n,
+    std::int64_t (*dtrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans,
+                                                oneapi::math::diag diag, std::int64_t n,
                                                 std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*ctrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans,
-                                                oneapi::mkl::diag diag, std::int64_t n,
+    std::int64_t (*ctrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans,
+                                                oneapi::math::diag diag, std::int64_t n,
                                                 std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*ztrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans,
-                                                oneapi::mkl::diag diag, std::int64_t n,
+    std::int64_t (*ztrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans,
+                                                oneapi::math::diag diag, std::int64_t n,
                                                 std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t ldb);
-    std::int64_t (*cungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect,
+    std::int64_t (*cungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect,
                                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                                 std::int64_t lda);
-    std::int64_t (*zungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect,
+    std::int64_t (*zungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect,
                                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                                 std::int64_t lda);
     std::int64_t (*cungqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t k, std::int64_t lda);
     std::int64_t (*zungqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n,
                                                 std::int64_t k, std::int64_t lda);
-    std::int64_t (*cungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*cungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*zungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                 std::int64_t n, std::int64_t lda);
-    std::int64_t (*cunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*cunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*zunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*zunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*cunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*cunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*zunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*zunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t k, std::int64_t lda,
                                                 std::int64_t ldc);
-    std::int64_t (*cunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*cunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldc);
-    std::int64_t (*zunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side,
-                                                oneapi::mkl::uplo uplo,
-                                                oneapi::mkl::transpose trans, std::int64_t m,
+    std::int64_t (*zunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side,
+                                                oneapi::math::uplo uplo,
+                                                oneapi::math::transpose trans, std::int64_t m,
                                                 std::int64_t n, std::int64_t lda, std::int64_t ldc);
     std::int64_t (*sgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m,
                                                       std::int64_t n, std::int64_t lda,
@@ -1636,19 +1640,19 @@ typedef struct {
                                                       std::int64_t stride_ipiv,
                                                       std::int64_t batch_size);
     std::int64_t (*sgetrs_batch_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
         std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     std::int64_t (*dgetrs_batch_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
         std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     std::int64_t (*cgetrs_batch_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
         std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     std::int64_t (*zgetrs_batch_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+        sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
         std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb,
         std::int64_t stride_b, std::int64_t batch_size);
     std::int64_t (*sgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m,
@@ -1671,38 +1675,38 @@ typedef struct {
                                                       std::int64_t stride_a,
                                                       std::int64_t stride_tau,
                                                       std::int64_t batch_size);
-    std::int64_t (*spotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*spotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t lda,
                                                       std::int64_t stride_a,
                                                       std::int64_t batch_size);
-    std::int64_t (*dpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t lda,
                                                       std::int64_t stride_a,
                                                       std::int64_t batch_size);
-    std::int64_t (*cpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*cpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t lda,
                                                       std::int64_t stride_a,
                                                       std::int64_t batch_size);
-    std::int64_t (*zpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t lda,
                                                       std::int64_t stride_a,
                                                       std::int64_t batch_size);
-    std::int64_t (*spotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*spotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t nrhs,
                                                       std::int64_t lda, std::int64_t stride_a,
                                                       std::int64_t ldb, std::int64_t stride_b,
                                                       std::int64_t batch_size);
-    std::int64_t (*dpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*dpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t nrhs,
                                                       std::int64_t lda, std::int64_t stride_a,
                                                       std::int64_t ldb, std::int64_t stride_b,
                                                       std::int64_t batch_size);
-    std::int64_t (*cpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*cpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t nrhs,
                                                       std::int64_t lda, std::int64_t stride_a,
                                                       std::int64_t ldb, std::int64_t stride_b,
                                                       std::int64_t batch_size);
-    std::int64_t (*zpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo,
+    std::int64_t (*zpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo,
                                                       std::int64_t n, std::int64_t nrhs,
                                                       std::int64_t lda, std::int64_t stride_a,
                                                       std::int64_t ldb, std::int64_t stride_b,
@@ -1756,16 +1760,16 @@ typedef struct {
                                                       std::int64_t* lda, std::int64_t group_count,
                                                       std::int64_t* group_sizes);
     std::int64_t (*sgetrs_group_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
         std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
     std::int64_t (*dgetrs_group_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
         std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
     std::int64_t (*cgetrs_group_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
         std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
     std::int64_t (*zgetrs_group_scratchpad_size_sycl)(
-        sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+        sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
         std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes);
     std::int64_t (*sgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m,
                                                       std::int64_t* n, std::int64_t* lda,
@@ -1791,38 +1795,38 @@ typedef struct {
                                                       std::int64_t* n, std::int64_t* k,
                                                       std::int64_t* lda, std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*spotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*spotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* lda,
                                                       std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*dpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*dpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* lda,
                                                       std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*cpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*cpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* lda,
                                                       std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*zpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*zpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* lda,
                                                       std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*spotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*spotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* nrhs,
                                                       std::int64_t* lda, std::int64_t* ldb,
                                                       std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*dpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*dpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* nrhs,
                                                       std::int64_t* lda, std::int64_t* ldb,
                                                       std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*cpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*cpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* nrhs,
                                                       std::int64_t* lda, std::int64_t* ldb,
                                                       std::int64_t group_count,
                                                       std::int64_t* group_sizes);
-    std::int64_t (*zpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo,
+    std::int64_t (*zpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo,
                                                       std::int64_t* n, std::int64_t* nrhs,
                                                       std::int64_t* lda, std::int64_t* ldb,
                                                       std::int64_t group_count,
diff --git a/src/lapack/lapack_loader.cpp b/src/lapack/lapack_loader.cpp
index f558cca09..410c04575 100644
--- a/src/lapack/lapack_loader.cpp
+++ b/src/lapack/lapack_loader.cpp
@@ -17,20 +17,20 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/lapack/detail/lapack_loader.hpp"
+#include "oneapi/math/lapack/detail/lapack_loader.hpp"
 
 #include "function_table_initializer.hpp"
 #include "lapack/function_table.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace lapack {
 namespace detail {
 
-static oneapi::mkl::detail::table_initializer<domain::lapack, lapack_function_table_t>
+static oneapi::math::detail::table_initializer<domain::lapack, lapack_function_table_t>
     function_tables;
 
-void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<float>& d,
            sycl::buffer<float>& e, sycl::buffer<std::complex<float>>& tauq,
            sycl::buffer<std::complex<float>>& taup, sycl::buffer<std::complex<float>>& scratchpad,
@@ -38,21 +38,21 @@ void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::
     function_tables[{ libkey, queue }].cgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup,
                                                    scratchpad, scratchpad_size);
 }
-void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
            sycl::buffer<double>& e, sycl::buffer<double>& tauq, sycl::buffer<double>& taup,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup,
                                                    scratchpad, scratchpad_size);
 }
-void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
            sycl::buffer<float>& tauq, sycl::buffer<float>& taup, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup,
                                                    scratchpad, scratchpad_size);
 }
-void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& d,
            sycl::buffer<double>& e, sycl::buffer<std::complex<double>>& tauq,
            sycl::buffer<std::complex<double>>& taup, sycl::buffer<std::complex<double>>& scratchpad,
@@ -60,109 +60,109 @@ void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::
     function_tables[{ libkey, queue }].zgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup,
                                                    scratchpad, scratchpad_size);
 }
-void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgerqf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgerqf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cgerqf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zgerqf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cgetri_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
+void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgetri_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
+void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgetri_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zgetri_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
            std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<float>>& b,
            std::int64_t ldb, sycl::buffer<std::complex<float>>& scratchpad,
@@ -170,21 +170,21 @@ void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpos
     function_tables[{ libkey, queue }].cgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                                                    scratchpad, scratchpad_size);
 }
-void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
            std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<double>& b, std::int64_t ldb,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                                                    scratchpad, scratchpad_size);
 }
-void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
            std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<float>& b, std::int64_t ldb,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                                                    scratchpad, scratchpad_size);
 }
-void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
            std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
            std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
@@ -192,24 +192,24 @@ void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpos
     function_tables[{ libkey, queue }].zgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb,
                                                    scratchpad, scratchpad_size);
 }
-void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-           oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
+void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+           oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& s, sycl::buffer<double>& u, std::int64_t ldu,
            sycl::buffer<double>& vt, std::int64_t ldvt, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                                                    ldvt, scratchpad, scratchpad_size);
 }
-void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-           oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
+void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+           oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& s, sycl::buffer<float>& u, std::int64_t ldu,
            sycl::buffer<float>& vt, std::int64_t ldvt, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                                                    ldvt, scratchpad, scratchpad_size);
 }
-void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-           oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+           oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<float>& s,
            sycl::buffer<std::complex<float>>& u, std::int64_t ldu,
            sycl::buffer<std::complex<float>>& vt, std::int64_t ldvt,
@@ -217,8 +217,8 @@ void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd j
     function_tables[{ libkey, queue }].cgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                                                    ldvt, scratchpad, scratchpad_size);
 }
-void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-           oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+           oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& s,
            sycl::buffer<std::complex<double>>& u, std::int64_t ldu,
            sycl::buffer<std::complex<double>>& vt, std::int64_t ldvt,
@@ -226,363 +226,363 @@ void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd j
     function_tables[{ libkey, queue }].zgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt,
                                                    ldvt, scratchpad, scratchpad_size);
 }
-void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-           oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<std::complex<float>>& a,
+void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+           oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<std::complex<float>>& a,
            std::int64_t lda, sycl::buffer<float>& w, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad,
                                                    scratchpad_size);
 }
-void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-           oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<std::complex<double>>& a,
+void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+           oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<std::complex<double>>& a,
            std::int64_t lda, sycl::buffer<double>& w,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad,
                                                    scratchpad_size);
 }
-void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-           oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+           oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb, sycl::buffer<float>& w,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].chegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                                                    scratchpad, scratchpad_size);
 }
-void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-           oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+           oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb, sycl::buffer<double>& w,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zhegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                                                    scratchpad, scratchpad_size);
 }
-void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<float>& d,
            sycl::buffer<float>& e, sycl::buffer<std::complex<float>>& tau,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].chetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad,
                                                    scratchpad_size);
 }
-void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda, sycl::buffer<double>& d,
            sycl::buffer<double>& e, sycl::buffer<std::complex<double>>& tau,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zhetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad,
                                                    scratchpad_size);
 }
-void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].chetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zhetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+void orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
            std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+void orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
            std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c,
                                                    ldc, scratchpad, scratchpad_size);
 }
-void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& c, std::int64_t ldc, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c,
                                                    ldc, scratchpad, scratchpad_size);
 }
-void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& c, std::int64_t ldc, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& tau,
            sycl::buffer<double>& c, std::int64_t ldc, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& tau,
            sycl::buffer<float>& c, std::int64_t ldc, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].spotrf_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dpotrf_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cpotrf_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zpotrf_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].spotri_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dpotri_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cpotri_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zpotri_sycl(queue, uplo, n, a, lda, scratchpad,
                                                    scratchpad_size);
 }
-void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
            std::int64_t ldb, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].spotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                                                    scratchpad_size);
 }
-void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
            std::int64_t ldb, sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                                                    scratchpad_size);
 }
-void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                                                    scratchpad_size);
 }
-void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad,
                                                    scratchpad_size);
 }
-void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-           oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
+void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+           oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a, std::int64_t lda,
            sycl::buffer<double>& w, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dsyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad,
                                                    scratchpad_size);
 }
-void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-           oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
+void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+           oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a, std::int64_t lda,
            sycl::buffer<float>& w, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].ssyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad,
                                                    scratchpad_size);
 }
-void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-           oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
+void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+           oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<double>& a,
            std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb, sycl::buffer<double>& w,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dsygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                                                    scratchpad, scratchpad_size);
 }
-void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-           oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
+void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+           oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer<float>& a,
            std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb, sycl::buffer<float>& w,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].ssygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w,
                                                    scratchpad, scratchpad_size);
 }
-void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& d,
            sycl::buffer<double>& e, sycl::buffer<double>& tau, sycl::buffer<double>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dsytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad,
                                                    scratchpad_size);
 }
-void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& d, sycl::buffer<float>& e,
            sycl::buffer<float>& tau, sycl::buffer<float>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].ssytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad,
                                                    scratchpad_size);
 }
-void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].ssytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda, sycl::buffer<std::int64_t>& ipiv,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].csytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::int64_t>& ipiv, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad,
                                                    scratchpad_size);
 }
-void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<float>>& a, std::int64_t lda,
+void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+           std::int64_t nrhs, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].ctrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b,
                                                    ldb, scratchpad, scratchpad_size);
 }
-void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b, std::int64_t ldb,
-           sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
+void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+           std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda, sycl::buffer<double>& b,
+           std::int64_t ldb, sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dtrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b,
                                                    ldb, scratchpad, scratchpad_size);
 }
-void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b, std::int64_t ldb,
-           sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
+void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+           std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda, sycl::buffer<float>& b,
+           std::int64_t ldb, sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].strtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b,
                                                    ldb, scratchpad, scratchpad_size);
 }
-void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-           oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-           sycl::buffer<std::complex<double>>& a, std::int64_t lda,
+void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+           oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
+           std::int64_t nrhs, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].ztrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b,
                                                    ldb, scratchpad, scratchpad_size);
 }
-void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+void ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
            std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<float>>& a,
            std::int64_t lda, sycl::buffer<std::complex<float>>& tau,
            sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+void ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
            std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer<std::complex<double>>& a,
            std::int64_t lda, sycl::buffer<std::complex<double>>& tau,
            sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
            std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& scratchpad,
            std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad,
                                                    scratchpad_size);
 }
-void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
@@ -590,8 +590,8 @@ void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side sid
     function_tables[{ libkey, queue }].cunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
@@ -599,8 +599,8 @@ void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side sid
     function_tables[{ libkey, queue }].zunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
@@ -608,8 +608,8 @@ void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side sid
     function_tables[{ libkey, queue }].cunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
@@ -617,8 +617,8 @@ void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side sid
     function_tables[{ libkey, queue }].zunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc,
                                                    scratchpad, scratchpad_size);
 }
-void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<float>>& a, std::int64_t lda,
            sycl::buffer<std::complex<float>>& tau, sycl::buffer<std::complex<float>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<float>>& scratchpad,
@@ -626,8 +626,8 @@ void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side sid
     function_tables[{ libkey, queue }].cunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c,
                                                    ldc, scratchpad, scratchpad_size);
 }
-void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-           oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n,
+void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+           oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
            sycl::buffer<std::complex<double>>& a, std::int64_t lda,
            sycl::buffer<std::complex<double>>& tau, sycl::buffer<std::complex<double>>& c,
            std::int64_t ldc, sycl::buffer<std::complex<double>>& scratchpad,
@@ -635,7 +635,7 @@ void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side sid
     function_tables[{ libkey, queue }].zunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c,
                                                    ldc, scratchpad, scratchpad_size);
 }
-sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, float* d, float* e,
                   std::complex<float>* tauq, std::complex<float>* taup,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -643,21 +643,21 @@ sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m
     return function_tables[{ libkey, queue }].cgebrd_usm_sycl(
         queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   double* a, std::int64_t lda, double* d, double* e, double* tauq, double* taup,
                   double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dgebrd_usm_sycl(
         queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   float* a, std::int64_t lda, float* d, float* e, float* tauq, float* taup,
                   float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sgebrd_usm_sycl(
         queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, double* d, double* e,
                   std::complex<double>* tauq, std::complex<double>* taup,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -665,111 +665,111 @@ sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m
     return function_tables[{ libkey, queue }].zgebrd_usm_sycl(
         queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   float* a, std::int64_t lda, float* tau, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   double* a, std::int64_t lda, double* tau, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* tau,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   double* a, std::int64_t lda, double* tau, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   float* a, std::int64_t lda, float* tau, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* tau,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* a,
+sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* a,
                   std::int64_t lda, std::int64_t* ipiv, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* a,
+sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* a,
                   std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                   std::int64_t n, std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                   std::int64_t* ipiv, std::complex<float>* b, std::int64_t ldb,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -777,21 +777,21 @@ sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::t
     return function_tables[{ libkey, queue }].cgetrs_usm_sycl(
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                   std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
                   std::int64_t* ipiv, double* b, std::int64_t ldb, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dgetrs_usm_sycl(
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                   std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv,
                   float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sgetrs_usm_sycl(
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                   std::int64_t n, std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                   std::int64_t* ipiv, std::complex<double>* b, std::int64_t ldb,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -799,8 +799,8 @@ sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::t
     return function_tables[{ libkey, queue }].zgetrs_usm_sycl(
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                  oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, double* a,
+sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+                  oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, double* a,
                   std::int64_t lda, double* s, double* u, std::int64_t ldu, double* vt,
                   std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -808,8 +808,8 @@ sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::j
                                                               u, ldu, vt, ldvt, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                  oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a,
+sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+                  oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a,
                   std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt,
                   std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -817,17 +817,18 @@ sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::j
                                                               u, ldu, vt, ldvt, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                  oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::complex<float>* a,
-                  std::int64_t lda, float* s, std::complex<float>* u, std::int64_t ldu,
-                  std::complex<float>* vt, std::int64_t ldvt, std::complex<float>* scratchpad,
-                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
+sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+                  oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
+                  std::complex<float>* a, std::int64_t lda, float* s, std::complex<float>* u,
+                  std::int64_t ldu, std::complex<float>* vt, std::int64_t ldvt,
+                  std::complex<float>* scratchpad, std::int64_t scratchpad_size,
+                  const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s,
                                                               u, ldu, vt, ldvt, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu,
-                  oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n,
+sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu,
+                  oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, double* s, std::complex<double>* u,
                   std::int64_t ldu, std::complex<double>* vt, std::int64_t ldvt,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -836,104 +837,104 @@ sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::j
                                                               u, ldu, vt, ldvt, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, std::complex<float>* a, std::int64_t lda,
+sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   float* w, std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cheevd_usm_sycl(
         queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, std::complex<double>* a, std::int64_t lda,
-                  double* w, std::complex<double>* scratchpad, std::int64_t scratchpad_size,
-                  const std::vector<sycl::event>& dependencies) {
+sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, std::complex<double>* a,
+                  std::int64_t lda, double* w, std::complex<double>* scratchpad,
+                  std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zheevd_usm_sycl(
         queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* b,
                   std::int64_t ldb, float* w, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].chegvd_usm_sycl(
         queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n,
+sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* b,
                   std::int64_t ldb, double* w, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zhegvd_usm_sycl(
         queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<float>* a, std::int64_t lda, float* d, float* e,
                   std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].chetrd_usm_sycl(
         queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<double>* a, std::int64_t lda, double* d, double* e,
                   std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zhetrd_usm_sycl(
         queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].chetrf_usm_sycl(
         queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zhetrf_usm_sycl(
         queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                   float* tau, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sorgbr_usm_sycl(
         queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                   double* tau, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dorgbr_usm_sycl(
         queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dorgqr_usm_sycl(
         queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sorgqr_usm_sycl(
         queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, float* a, std::int64_t lda, float* tau, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].sorgtr_usm_sycl(
         queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, double* a, std::int64_t lda, double* tau, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dorgtr_usm_sycl(
         queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, float* a, std::int64_t lda, float* tau, float* c,
                   std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -941,8 +942,8 @@ sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               lda, tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, double* a, std::int64_t lda, double* tau, double* c,
                   std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -950,8 +951,8 @@ sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               lda, tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                   float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -959,8 +960,8 @@ sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   double* a, std::int64_t lda, double* tau, double* c, std::int64_t ldc,
                   double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -968,8 +969,8 @@ sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   double* a, std::int64_t lda, double* tau, double* c, std::int64_t ldc,
                   double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -977,8 +978,8 @@ sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc,
                   float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -986,157 +987,157 @@ sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, float* a, std::int64_t lda, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].spotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, double* a, std::int64_t lda, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, float* a, std::int64_t lda, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].spotri_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, double* a, std::int64_t lda, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, float* b,
                   std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].spotrs_usm_sycl(
         queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, double* b,
                   std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dpotrs_usm_sycl(
         queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* b, std::int64_t ldb, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cpotrs_usm_sycl(
         queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* b, std::int64_t ldb, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zpotrs_usm_sycl(
         queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* w,
+sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* w,
                   double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dsyevd_usm_sycl(
         queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz,
-                  oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* w,
+sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz,
+                  oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* w,
                   float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].ssyevd_usm_sycl(
         queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, double* a,
+sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, double* a,
                   std::int64_t lda, double* b, std::int64_t ldb, double* w, double* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dsygvd_usm_sycl(
         queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype,
-                  oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, float* a,
+sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype,
+                  oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, float* a,
                   std::int64_t lda, float* b, std::int64_t ldb, float* w, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].ssygvd_usm_sycl(
         queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, double* a, std::int64_t lda, double* d, double* e, double* tau,
                   double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dsytrd_usm_sycl(
         queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, float* a, std::int64_t lda, float* d, float* e, float* tau,
                   float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].ssytrd_usm_sycl(
         queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].ssytrf_usm_sycl(
         queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv,
                   double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dsytrf_usm_sycl(
         queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<float>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].csytrf_usm_sycl(
         queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<double>* a, std::int64_t lda, std::int64_t* ipiv,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zsytrf_usm_sycl(
         queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                   std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* b, std::int64_t ldb, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1144,8 +1145,8 @@ sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::u
                                                               lda, b, ldb, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                   std::int64_t nrhs, double* a, std::int64_t lda, double* b, std::int64_t ldb,
                   double* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -1153,8 +1154,8 @@ sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::u
                                                               lda, b, ldb, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                   std::int64_t nrhs, float* a, std::int64_t lda, float* b, std::int64_t ldb,
                   float* scratchpad, std::int64_t scratchpad_size,
                   const std::vector<sycl::event>& dependencies) {
@@ -1162,8 +1163,8 @@ sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::u
                                                               lda, b, ldb, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
-                  oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n,
+sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+                  oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n,
                   std::int64_t nrhs, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* b, std::int64_t ldb, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1171,50 +1172,50 @@ sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::u
                                                               lda, b, ldb, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<float>* a,
                   std::int64_t lda, std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cungbr_usm_sycl(
         queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec,
+sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec,
                   std::int64_t m, std::int64_t n, std::int64_t k, std::complex<double>* a,
                   std::int64_t lda, std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zungbr_usm_sycl(
         queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::int64_t k, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cungqr_usm_sycl(
         queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                   std::int64_t k, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zungqr_usm_sycl(
         queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* tau, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].cungtr_usm_sycl(
         queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                   std::int64_t n, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* tau, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].zungtr_usm_sycl(
         queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                   std::complex<float>* c, std::int64_t ldc, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1222,8 +1223,8 @@ sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* tau,
                   std::complex<double>* c, std::int64_t ldc, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1231,8 +1232,8 @@ sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   std::complex<float>* a, std::int64_t lda, std::complex<float>* tau,
                   std::complex<float>* c, std::int64_t ldc, std::complex<float>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1240,8 +1241,8 @@ sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
+sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k,
                   std::complex<double>* a, std::int64_t lda, std::complex<double>* tau,
                   std::complex<double>* c, std::int64_t ldc, std::complex<double>* scratchpad,
                   std::int64_t scratchpad_size, const std::vector<sycl::event>& dependencies) {
@@ -1249,8 +1250,8 @@ sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, std::complex<float>* a, std::int64_t lda,
                   std::complex<float>* tau, std::complex<float>* c, std::int64_t ldc,
                   std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1259,8 +1260,8 @@ sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               lda, tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side,
-                  oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m,
+sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+                  oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m,
                   std::int64_t n, std::complex<double>* a, std::int64_t lda,
                   std::complex<double>* tau, std::complex<double>* c, std::int64_t ldc,
                   std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1269,21 +1270,21 @@ sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::s
                                                               lda, tau, c, ldc, scratchpad,
                                                               scratchpad_size, dependencies);
 }
-void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<float>& tau, std::int64_t stride_tau, std::int64_t batch_size,
                  sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sgeqrf_batch_sycl(
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<double>& tau, std::int64_t stride_tau, std::int64_t batch_size,
                  sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dgeqrf_batch_sycl(
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<float>>& tau, std::int64_t stride_tau,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
@@ -1291,7 +1292,7 @@ void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].cgeqrf_batch_sycl(
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::complex<double>>& tau, std::int64_t stride_tau,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
@@ -1299,7 +1300,7 @@ void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].zgeqrf_batch_sycl(
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<float>& scratchpad,
@@ -1307,7 +1308,7 @@ void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
     function_tables[{ libkey, queue }].sgetri_batch_sycl(
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
@@ -1315,7 +1316,7 @@ void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
     function_tables[{ libkey, queue }].dgetri_batch_sycl(
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
@@ -1323,7 +1324,7 @@ void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
     function_tables[{ libkey, queue }].cgetri_batch_sycl(
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
@@ -1331,7 +1332,7 @@ void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
     function_tables[{ libkey, queue }].zgetri_batch_sycl(
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<float>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1341,7 +1342,7 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::tr
                                                          ipiv, stride_ipiv, b, ldb, stride_b,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  sycl::buffer<double>& b, std::int64_t ldb, std::int64_t stride_b,
@@ -1351,7 +1352,7 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::tr
                                                          ipiv, stride_ipiv, b, ldb, stride_b,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                  std::int64_t stride_ipiv, sycl::buffer<std::complex<float>>& b, std::int64_t ldb,
@@ -1361,7 +1362,7 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::tr
                                                          ipiv, stride_ipiv, b, ldb, stride_b,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans,
+void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::int64_t>& ipiv,
                  std::int64_t stride_ipiv, sycl::buffer<std::complex<double>>& b, std::int64_t ldb,
@@ -1371,7 +1372,7 @@ void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::tr
                                                          ipiv, stride_ipiv, b, ldb, stride_b,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<float>& scratchpad,
@@ -1379,7 +1380,7 @@ void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].sgetrf_batch_sycl(
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
@@ -1387,7 +1388,7 @@ void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].dgetrf_batch_sycl(
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<float>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<std::complex<float>>& scratchpad,
@@ -1395,7 +1396,7 @@ void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].cgetrf_batch_sycl(
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  sycl::buffer<std::complex<double>>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<std::int64_t>& ipiv, std::int64_t stride_ipiv,
                  std::int64_t batch_size, sycl::buffer<std::complex<double>>& scratchpad,
@@ -1403,49 +1404,49 @@ void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].zgetrf_batch_sycl(
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size);
 }
-void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::int64_t k, sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<float>& tau, std::int64_t stride_tau, std::int64_t batch_size,
                  sycl::buffer<float>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].sorgqr_batch_sycl(
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::int64_t k, sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  sycl::buffer<double>& tau, std::int64_t stride_tau, std::int64_t batch_size,
                  sycl::buffer<double>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dorgqr_batch_sycl(
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, sycl::buffer<float>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<float>& scratchpad,
                  std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].spotrf_batch_sycl(queue, uplo, n, a, lda, stride_a,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, sycl::buffer<double>& a, std::int64_t lda, std::int64_t stride_a,
                  std::int64_t batch_size, sycl::buffer<double>& scratchpad,
                  std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].dpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                  std::int64_t stride_a, std::int64_t batch_size,
                  sycl::buffer<std::complex<float>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].cpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                  std::int64_t stride_a, std::int64_t batch_size,
                  sycl::buffer<std::complex<double>>& scratchpad, std::int64_t scratchpad_size) {
     function_tables[{ libkey, queue }].zpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a,
                                                          batch_size, scratchpad, scratchpad_size);
 }
-void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<float>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<float>& b, std::int64_t ldb,
                  std::int64_t stride_b, std::int64_t batch_size, sycl::buffer<float>& scratchpad,
@@ -1454,7 +1455,7 @@ void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::up
                                                          ldb, stride_b, batch_size, scratchpad,
                                                          scratchpad_size);
 }
-void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<double>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<double>& b, std::int64_t ldb,
                  std::int64_t stride_b, std::int64_t batch_size, sycl::buffer<double>& scratchpad,
@@ -1463,7 +1464,7 @@ void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::up
                                                          ldb, stride_b, batch_size, scratchpad,
                                                          scratchpad_size);
 }
-void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<float>>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<float>>& b,
                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
@@ -1472,7 +1473,7 @@ void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::up
                                                          ldb, stride_b, batch_size, scratchpad,
                                                          scratchpad_size);
 }
-void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                  std::int64_t n, std::int64_t nrhs, sycl::buffer<std::complex<double>>& a,
                  std::int64_t lda, std::int64_t stride_a, sycl::buffer<std::complex<double>>& b,
                  std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
@@ -1481,7 +1482,7 @@ void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::up
                                                          ldb, stride_b, batch_size, scratchpad,
                                                          scratchpad_size);
 }
-void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::int64_t k, sycl::buffer<std::complex<float>>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::complex<float>>& tau,
                  std::int64_t stride_tau, std::int64_t batch_size,
@@ -1489,7 +1490,7 @@ void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].cungqr_batch_sycl(
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
                  std::int64_t k, sycl::buffer<std::complex<double>>& a, std::int64_t lda,
                  std::int64_t stride_a, sycl::buffer<std::complex<double>>& tau,
                  std::int64_t stride_tau, std::int64_t batch_size,
@@ -1497,7 +1498,7 @@ void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
     function_tables[{ libkey, queue }].zungqr_batch_sycl(
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a,
                         float* tau, std::int64_t stride_tau, std::int64_t batch_size,
                         float* scratchpad, std::int64_t scratchpad_size,
@@ -1506,7 +1507,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a,
                         double* tau, std::int64_t stride_tau, std::int64_t batch_size,
                         double* scratchpad, std::int64_t scratchpad_size,
@@ -1515,7 +1516,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<float>* tau, std::int64_t stride_tau,
                         std::int64_t batch_size, std::complex<float>* scratchpad,
@@ -1525,7 +1526,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<double>* tau, std::int64_t stride_tau,
                         std::int64_t batch_size, std::complex<double>* scratchpad,
@@ -1535,7 +1536,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size,
                         float* scratchpad, std::int64_t scratchpad_size,
@@ -1544,7 +1545,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size,
                         double* scratchpad, std::int64_t scratchpad_size,
@@ -1553,7 +1554,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::int64_t batch_size, std::complex<float>* scratchpad,
@@ -1563,7 +1564,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv,
                         std::int64_t batch_size, std::complex<double>* scratchpad,
@@ -1573,7 +1574,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* a,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad,
                         std::int64_t scratchpad_size,
@@ -1582,7 +1583,7 @@ sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* a,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad,
                         std::int64_t scratchpad_size,
@@ -1591,7 +1592,7 @@ sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                         std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1600,7 +1601,7 @@ sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n,
                         std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1609,8 +1610,8 @@ sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, float* a,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, float* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b,
                         std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size,
@@ -1619,8 +1620,8 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, double* a,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, double* a,
                         std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv,
                         std::int64_t stride_ipiv, double* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size, double* scratchpad,
@@ -1630,8 +1631,8 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                         std::complex<float>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<float>* b,
                         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
@@ -1641,8 +1642,8 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs,
                         std::complex<double>* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex<double>* b,
                         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
@@ -1652,7 +1653,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size,
         scratchpad, scratchpad_size, dependencies);
 }
-sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::int64_t k, float* a, std::int64_t lda,
                         std::int64_t stride_a, float* tau, std::int64_t stride_tau,
                         std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size,
@@ -1661,7 +1662,7 @@ sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::int64_t k, double* a, std::int64_t lda,
                         std::int64_t stride_a, double* tau, std::int64_t stride_tau,
                         std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size,
@@ -1670,21 +1671,21 @@ sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].spotrf_batch_usm_sycl(
         queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a,
                         std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dpotrf_batch_usm_sycl(
         queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t batch_size,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1692,7 +1693,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
     return function_tables[{ libkey, queue }].cpotrf_batch_usm_sycl(
         queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::int64_t batch_size,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1700,7 +1701,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
     return function_tables[{ libkey, queue }].zpotrf_batch_usm_sycl(
         queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda,
                         std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b,
                         std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size,
@@ -1709,7 +1710,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda,
                         std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b,
                         std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size,
@@ -1718,7 +1719,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, std::int64_t nrhs, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<float>* b, std::int64_t ldb,
                         std::int64_t stride_b, std::int64_t batch_size,
@@ -1728,7 +1729,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
                         std::int64_t n, std::int64_t nrhs, std::complex<double>* a,
                         std::int64_t lda, std::int64_t stride_a, std::complex<double>* b,
                         std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size,
@@ -1738,7 +1739,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::int64_t k, std::complex<float>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<float>* tau, std::int64_t stride_tau,
                         std::int64_t batch_size, std::complex<float>* scratchpad,
@@ -1748,7 +1749,7 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m,
+sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m,
                         std::int64_t n, std::int64_t k, std::complex<double>* a, std::int64_t lda,
                         std::int64_t stride_a, std::complex<double>* tau, std::int64_t stride_tau,
                         std::int64_t batch_size, std::complex<double>* scratchpad,
@@ -1758,7 +1759,7 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, float** a, std::int64_t* lda, float** tau,
                         std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad,
                         std::int64_t scratchpad_size,
@@ -1767,7 +1768,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, double** a, std::int64_t* lda, double** tau,
                         std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad,
                         std::int64_t scratchpad_size,
@@ -1776,7 +1777,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
                         std::complex<float>** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<float>* scratchpad,
@@ -1786,7 +1787,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
                         std::complex<double>** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<double>* scratchpad,
@@ -1796,7 +1797,7 @@ sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, float** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad,
                         std::int64_t scratchpad_size,
@@ -1805,7 +1806,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, double** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad,
                         std::int64_t scratchpad_size,
@@ -1814,7 +1815,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1823,7 +1824,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
                         std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1832,7 +1833,7 @@ sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, float** a,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, float** a,
                         std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
@@ -1840,15 +1841,16 @@ sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, double** a,
-                        std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count,
-                        std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
+                        double** a, std::int64_t* lda, std::int64_t** ipiv,
+                        std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad,
+                        std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
     return function_tables[{ libkey, queue }].dgetri_group_usm_sycl(
         queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                         std::complex<float>** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1857,7 +1859,7 @@ sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n,
+sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n,
                         std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1866,8 +1868,8 @@ sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
                         float** a, std::int64_t* lda, std::int64_t** ipiv, float** b,
                         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
                         float* scratchpad, std::int64_t scratchpad_size,
@@ -1876,8 +1878,8 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
                         double** a, std::int64_t* lda, std::int64_t** ipiv, double** b,
                         std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes,
                         double* scratchpad, std::int64_t scratchpad_size,
@@ -1886,8 +1888,8 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
                         std::complex<float>** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::complex<float>** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<float>* scratchpad,
@@ -1897,8 +1899,8 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
-                        oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
+sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue,
+                        oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs,
                         std::complex<double>** a, std::int64_t* lda, std::int64_t** ipiv,
                         std::complex<double>** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<double>* scratchpad,
@@ -1908,7 +1910,7 @@ sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue,
         queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad,
         scratchpad_size, dependencies);
 }
-sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::int64_t* k, float** a, std::int64_t* lda, float** tau,
                         std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad,
                         std::int64_t scratchpad_size,
@@ -1917,7 +1919,7 @@ sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::int64_t* k, double** a, std::int64_t* lda,
                         double** tau, std::int64_t group_count, std::int64_t* group_sizes,
                         double* scratchpad, std::int64_t scratchpad_size,
@@ -1926,7 +1928,7 @@ sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, float** a, std::int64_t* lda, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
@@ -1934,7 +1936,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, double** a, std::int64_t* lda, std::int64_t group_count,
                         std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
                         const std::vector<sycl::event>& dependencies) {
@@ -1942,7 +1944,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, std::complex<float>** a, std::int64_t* lda,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<float>* scratchpad, std::int64_t scratchpad_size,
@@ -1951,7 +1953,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, std::complex<double>** a, std::int64_t* lda,
                         std::int64_t group_count, std::int64_t* group_sizes,
                         std::complex<double>* scratchpad, std::int64_t scratchpad_size,
@@ -1960,7 +1962,7 @@ sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, std::int64_t* nrhs, float** a, std::int64_t* lda,
                         float** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size,
@@ -1969,7 +1971,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, std::int64_t* nrhs, double** a, std::int64_t* lda,
                         double** b, std::int64_t* ldb, std::int64_t group_count,
                         std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size,
@@ -1978,7 +1980,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, std::int64_t* nrhs, std::complex<float>** a,
                         std::int64_t* lda, std::complex<float>** b, std::int64_t* ldb,
                         std::int64_t group_count, std::int64_t* group_sizes,
@@ -1988,7 +1990,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo,
+sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo,
                         std::int64_t* n, std::int64_t* nrhs, std::complex<double>** a,
                         std::int64_t* lda, std::complex<double>** b, std::int64_t* ldb,
                         std::int64_t group_count, std::int64_t* group_sizes,
@@ -1998,7 +2000,7 @@ sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::
         queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::int64_t* k, std::complex<float>** a,
                         std::int64_t* lda, std::complex<float>** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<float>* scratchpad,
@@ -2008,7 +2010,7 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
         queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size,
         dependencies);
 }
-sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m,
+sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m,
                         std::int64_t* n, std::int64_t* k, std::complex<double>** a,
                         std::int64_t* lda, std::complex<double>** tau, std::int64_t group_count,
                         std::int64_t* group_sizes, std::complex<double>* scratchpad,
@@ -2020,555 +2022,560 @@ sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int
 }
 
 template <>
-std::int64_t gebrd_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t gebrd_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                           std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].sgebrd_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gebrd_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t gebrd_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                            std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].dgebrd_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gebrd_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t gebrd_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].cgebrd_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gebrd_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t gebrd_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].zgebrd_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gerqf_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t gerqf_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                           std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].sgerqf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gerqf_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t gerqf_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                            std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].dgerqf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gerqf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t gerqf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].cgerqf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gerqf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t gerqf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].zgerqf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t geqrf_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t geqrf_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                           std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].sgeqrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t geqrf_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t geqrf_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                            std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].dgeqrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t geqrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t geqrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].cgeqrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t geqrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t geqrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].zgeqrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t gesvd_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+std::int64_t gesvd_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                                           std::int64_t m, std::int64_t n, std::int64_t lda,
                                           std::int64_t ldu, std::int64_t ldvt) {
     return function_tables[{ libkey, queue }].sgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n,
                                                                           lda, ldu, ldvt);
 }
 template <>
-std::int64_t gesvd_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+std::int64_t gesvd_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                                            std::int64_t m, std::int64_t n, std::int64_t lda,
                                            std::int64_t ldu, std::int64_t ldvt) {
     return function_tables[{ libkey, queue }].dgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n,
                                                                           lda, ldu, ldvt);
 }
 template <>
-std::int64_t gesvd_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t gesvd_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue,
-                                                        oneapi::mkl::jobsvd jobu,
-                                                        oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                                                        oneapi::math::jobsvd jobu,
+                                                        oneapi::math::jobsvd jobvt, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda,
                                                         std::int64_t ldu, std::int64_t ldvt) {
     return function_tables[{ libkey, queue }].cgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n,
                                                                           lda, ldu, ldvt);
 }
 template <>
-std::int64_t gesvd_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t gesvd_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue,
-                                                         oneapi::mkl::jobsvd jobu,
-                                                         oneapi::mkl::jobsvd jobvt, std::int64_t m,
+                                                         oneapi::math::jobsvd jobu,
+                                                         oneapi::math::jobsvd jobvt, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda,
                                                          std::int64_t ldu, std::int64_t ldvt) {
     return function_tables[{ libkey, queue }].zgesvd_scratchpad_size_sycl(queue, jobu, jobvt, m, n,
                                                                           lda, ldu, ldvt);
 }
 template <>
-std::int64_t getrf_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getrf_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                           std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].sgetrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t getrf_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getrf_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                            std::int64_t m, std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].dgetrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t getrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t getrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].cgetrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t getrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t getrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].zgetrf_scratchpad_size_sycl(queue, m, n, lda);
 }
 template <>
-std::int64_t getri_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getri_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                           std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].sgetri_scratchpad_size_sycl(queue, n, lda);
 }
 template <>
-std::int64_t getri_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getri_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                            std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].dgetri_scratchpad_size_sycl(queue, n, lda);
 }
 template <>
-std::int64_t getri_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t getri_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue, std::int64_t n,
                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].cgetri_scratchpad_size_sycl(queue, n, lda);
 }
 template <>
-std::int64_t getri_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t getri_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue, std::int64_t n,
                                                          std::int64_t lda) {
     return function_tables[{ libkey, queue }].zgetri_scratchpad_size_sycl(queue, n, lda);
 }
 template <>
-std::int64_t getrs_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::transpose trans, std::int64_t n,
+std::int64_t getrs_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::transpose trans, std::int64_t n,
                                           std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].sgetrs_scratchpad_size_sycl(queue, trans, n, nrhs,
                                                                           lda, ldb);
 }
 template <>
-std::int64_t getrs_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::transpose trans, std::int64_t n,
+std::int64_t getrs_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::transpose trans, std::int64_t n,
                                            std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].dgetrs_scratchpad_size_sycl(queue, trans, n, nrhs,
                                                                           lda, ldb);
 }
 template <>
-std::int64_t getrs_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t getrs_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue,
-                                                        oneapi::mkl::transpose trans,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t n, std::int64_t nrhs,
                                                         std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].cgetrs_scratchpad_size_sycl(queue, trans, n, nrhs,
                                                                           lda, ldb);
 }
 template <>
-std::int64_t getrs_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t getrs_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue,
-                                                         oneapi::mkl::transpose trans,
+                                                         oneapi::math::transpose trans,
                                                          std::int64_t n, std::int64_t nrhs,
                                                          std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].zgetrs_scratchpad_size_sycl(queue, trans, n, nrhs,
                                                                           lda, ldb);
 }
 template <>
-std::int64_t heevd_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t heevd_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::job jobz,
+                                                        oneapi::math::uplo uplo, std::int64_t n,
                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].cheevd_scratchpad_size_sycl(queue, jobz, uplo, n,
                                                                           lda);
 }
 template <>
-std::int64_t heevd_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t heevd_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue, oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
                                                          std::int64_t lda) {
     return function_tables[{ libkey, queue }].zheevd_scratchpad_size_sycl(queue, jobz, uplo, n,
                                                                           lda);
 }
 template <>
-std::int64_t hegvd_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t hegvd_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue, std::int64_t itype,
-                                                        oneapi::mkl::job jobz,
-                                                        oneapi::mkl::uplo uplo, std::int64_t n,
+                                                        oneapi::math::job jobz,
+                                                        oneapi::math::uplo uplo, std::int64_t n,
                                                         std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].chegvd_scratchpad_size_sycl(queue, itype, jobz, uplo,
                                                                           n, lda, ldb);
 }
 template <>
-std::int64_t hegvd_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t hegvd_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue, std::int64_t itype,
-                                                         oneapi::mkl::job jobz,
-                                                         oneapi::mkl::uplo uplo, std::int64_t n,
+                                                         oneapi::math::job jobz,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
                                                          std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].zhegvd_scratchpad_size_sycl(queue, itype, jobz, uplo,
                                                                           n, lda, ldb);
 }
 template <>
-std::int64_t hetrd_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t hetrd_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].chetrd_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t hetrd_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t hetrd_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].zhetrd_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t hetrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].chetrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t hetrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t hetrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].zhetrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t orgbr_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::generate vect, std::int64_t m,
+std::int64_t orgbr_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::generate vect, std::int64_t m,
                                           std::int64_t n, std::int64_t k, std::int64_t lda) {
     return function_tables[{ libkey, queue }].sorgbr_scratchpad_size_sycl(queue, vect, m, n, k,
                                                                           lda);
 }
 template <>
-std::int64_t orgbr_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::generate vect, std::int64_t m,
+std::int64_t orgbr_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::generate vect, std::int64_t m,
                                            std::int64_t n, std::int64_t k, std::int64_t lda) {
     return function_tables[{ libkey, queue }].dorgbr_scratchpad_size_sycl(queue, vect, m, n, k,
                                                                           lda);
 }
 template <>
-std::int64_t orgtr_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t orgtr_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::uplo uplo, std::int64_t n,
                                           std::int64_t lda) {
     return function_tables[{ libkey, queue }].sorgtr_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t orgtr_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t orgtr_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::uplo uplo, std::int64_t n,
                                            std::int64_t lda) {
     return function_tables[{ libkey, queue }].dorgtr_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t orgqr_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t orgqr_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                           std::int64_t m, std::int64_t n, std::int64_t k,
                                           std::int64_t lda) {
     return function_tables[{ libkey, queue }].sorgqr_scratchpad_size_sycl(queue, m, n, k, lda);
 }
 template <>
-std::int64_t orgqr_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t orgqr_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                            std::int64_t m, std::int64_t n, std::int64_t k,
                                            std::int64_t lda) {
     return function_tables[{ libkey, queue }].dorgqr_scratchpad_size_sycl(queue, m, n, k, lda);
 }
 template <>
-std::int64_t ormrq_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t ormrq_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::side side, oneapi::math::transpose trans,
                                           std::int64_t m, std::int64_t n, std::int64_t k,
                                           std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].sormrq_scratchpad_size_sycl(queue, side, trans, m, n,
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t ormrq_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t ormrq_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::side side, oneapi::math::transpose trans,
                                            std::int64_t m, std::int64_t n, std::int64_t k,
                                            std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].dormrq_scratchpad_size_sycl(queue, side, trans, m, n,
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t ormqr_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t ormqr_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::side side, oneapi::math::transpose trans,
                                           std::int64_t m, std::int64_t n, std::int64_t k,
                                           std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].sormqr_scratchpad_size_sycl(queue, side, trans, m, n,
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t ormqr_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::side side, oneapi::mkl::transpose trans,
+std::int64_t ormqr_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::side side, oneapi::math::transpose trans,
                                            std::int64_t m, std::int64_t n, std::int64_t k,
                                            std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].dormqr_scratchpad_size_sycl(queue, side, trans, m, n,
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t ormtr_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                          oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormtr_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::side side, oneapi::math::uplo uplo,
+                                          oneapi::math::transpose trans, std::int64_t m,
                                           std::int64_t n, std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].sormtr_scratchpad_size_sycl(queue, side, uplo, trans,
                                                                           m, n, lda, ldc);
 }
 template <>
-std::int64_t ormtr_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::side side, oneapi::mkl::uplo uplo,
-                                           oneapi::mkl::transpose trans, std::int64_t m,
+std::int64_t ormtr_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::side side, oneapi::math::uplo uplo,
+                                           oneapi::math::transpose trans, std::int64_t m,
                                            std::int64_t n, std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].dormtr_scratchpad_size_sycl(queue, side, uplo, trans,
                                                                           m, n, lda, ldc);
 }
 template <>
-std::int64_t potrf_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrf_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::uplo uplo, std::int64_t n,
                                           std::int64_t lda) {
     return function_tables[{ libkey, queue }].spotrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t potrf_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrf_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::uplo uplo, std::int64_t n,
                                            std::int64_t lda) {
     return function_tables[{ libkey, queue }].dpotrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t potrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].cpotrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t potrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t potrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].zpotrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t potrs_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs,
-                                          std::int64_t lda, std::int64_t ldb) {
+std::int64_t potrs_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::uplo uplo, std::int64_t n,
+                                          std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].spotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda,
                                                                           ldb);
 }
 template <>
-std::int64_t potrs_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrs_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::uplo uplo, std::int64_t n,
                                            std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].dpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda,
                                                                           ldb);
 }
 template <>
-std::int64_t potrs_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potrs_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t nrhs,
                                                         std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].cpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda,
                                                                           ldb);
 }
 template <>
-std::int64_t potrs_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t nrhs,
-                                                         std::int64_t lda, std::int64_t ldb) {
+std::int64_t potrs_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t nrhs, std::int64_t lda,
+                                                         std::int64_t ldb) {
     return function_tables[{ libkey, queue }].zpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda,
                                                                           ldb);
 }
 template <>
-std::int64_t potri_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potri_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::uplo uplo, std::int64_t n,
                                           std::int64_t lda) {
     return function_tables[{ libkey, queue }].spotri_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t potri_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potri_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::uplo uplo, std::int64_t n,
                                            std::int64_t lda) {
     return function_tables[{ libkey, queue }].dpotri_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t potri_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t potri_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].cpotri_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t potri_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t potri_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].zpotri_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t sytrf_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t sytrf_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::uplo uplo, std::int64_t n,
                                           std::int64_t lda) {
     return function_tables[{ libkey, queue }].ssytrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t sytrf_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t sytrf_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::uplo uplo, std::int64_t n,
                                            std::int64_t lda) {
     return function_tables[{ libkey, queue }].dsytrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t sytrf_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t sytrf_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].csytrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t sytrf_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t sytrf_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].zsytrf_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t syevd_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+std::int64_t syevd_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::job jobz, oneapi::math::uplo uplo,
                                           std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].ssyevd_scratchpad_size_sycl(queue, jobz, uplo, n,
                                                                           lda);
 }
 template <>
-std::int64_t syevd_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+std::int64_t syevd_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::job jobz, oneapi::math::uplo uplo,
                                            std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].dsyevd_scratchpad_size_sycl(queue, jobz, uplo, n,
                                                                           lda);
 }
 template <>
-std::int64_t sygvd_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          std::int64_t itype, oneapi::mkl::job jobz,
-                                          oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
+std::int64_t sygvd_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          std::int64_t itype, oneapi::math::job jobz,
+                                          oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda,
                                           std::int64_t ldb) {
     return function_tables[{ libkey, queue }].ssygvd_scratchpad_size_sycl(queue, itype, jobz, uplo,
                                                                           n, lda, ldb);
 }
 template <>
-std::int64_t sygvd_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           std::int64_t itype, oneapi::mkl::job jobz,
-                                           oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda,
-                                           std::int64_t ldb) {
+std::int64_t sygvd_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           std::int64_t itype, oneapi::math::job jobz,
+                                           oneapi::math::uplo uplo, std::int64_t n,
+                                           std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].dsygvd_scratchpad_size_sycl(queue, itype, jobz, uplo,
                                                                           n, lda, ldb);
 }
 template <>
-std::int64_t sytrd_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t sytrd_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::uplo uplo, std::int64_t n,
                                           std::int64_t lda) {
     return function_tables[{ libkey, queue }].ssytrd_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t sytrd_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t sytrd_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::uplo uplo, std::int64_t n,
                                            std::int64_t lda) {
     return function_tables[{ libkey, queue }].dsytrd_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t trtrs_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                          oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                          oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs,
-                                          std::int64_t lda, std::int64_t ldb) {
+std::int64_t trtrs_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                          oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                          oneapi::math::diag diag, std::int64_t n,
+                                          std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].strtrs_scratchpad_size_sycl(queue, uplo, trans, diag,
                                                                           n, nrhs, lda, ldb);
 }
 template <>
-std::int64_t trtrs_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                           oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                                           oneapi::mkl::diag diag, std::int64_t n,
+std::int64_t trtrs_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                           oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                                           oneapi::math::diag diag, std::int64_t n,
                                            std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].dtrtrs_scratchpad_size_sycl(queue, uplo, trans, diag,
                                                                           n, nrhs, lda, ldb);
 }
 template <>
-std::int64_t trtrs_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
-                                                        oneapi::mkl::diag diag, std::int64_t n,
+std::int64_t trtrs_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
+                                                        oneapi::math::transpose trans,
+                                                        oneapi::math::diag diag, std::int64_t n,
                                                         std::int64_t nrhs, std::int64_t lda,
                                                         std::int64_t ldb) {
     return function_tables[{ libkey, queue }].ctrtrs_scratchpad_size_sycl(queue, uplo, trans, diag,
                                                                           n, nrhs, lda, ldb);
 }
 template <>
-std::int64_t trtrs_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         oneapi::mkl::diag diag, std::int64_t n,
-                                                         std::int64_t nrhs, std::int64_t lda,
-                                                         std::int64_t ldb) {
+std::int64_t trtrs_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo,
+    oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs,
+    std::int64_t lda, std::int64_t ldb) {
     return function_tables[{ libkey, queue }].ztrtrs_scratchpad_size_sycl(queue, uplo, trans, diag,
                                                                           n, nrhs, lda, ldb);
 }
 template <>
-std::int64_t ungbr_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t ungbr_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue,
-                                                        oneapi::mkl::generate vect, std::int64_t m,
+                                                        oneapi::math::generate vect, std::int64_t m,
                                                         std::int64_t n, std::int64_t k,
                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].cungbr_scratchpad_size_sycl(queue, vect, m, n, k,
                                                                           lda);
 }
 template <>
-std::int64_t ungbr_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t ungbr_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue,
-                                                         oneapi::mkl::generate vect, std::int64_t m,
-                                                         std::int64_t n, std::int64_t k,
-                                                         std::int64_t lda) {
+                                                         oneapi::math::generate vect,
+                                                         std::int64_t m, std::int64_t n,
+                                                         std::int64_t k, std::int64_t lda) {
     return function_tables[{ libkey, queue }].zungbr_scratchpad_size_sycl(queue, vect, m, n, k,
                                                                           lda);
 }
 template <>
-std::int64_t ungqr_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t ungqr_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                         sycl::queue& queue, std::int64_t m,
                                                         std::int64_t n, std::int64_t k,
                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].cungqr_scratchpad_size_sycl(queue, m, n, k, lda);
 }
 template <>
-std::int64_t ungqr_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t ungqr_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                          sycl::queue& queue, std::int64_t m,
                                                          std::int64_t n, std::int64_t k,
                                                          std::int64_t lda) {
     return function_tables[{ libkey, queue }].zungqr_scratchpad_size_sycl(queue, m, n, k, lda);
 }
 template <>
-std::int64_t ungtr_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::uplo uplo,
+std::int64_t ungtr_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::uplo uplo,
                                                         std::int64_t n, std::int64_t lda) {
     return function_tables[{ libkey, queue }].cungtr_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t ungtr_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::uplo uplo,
-                                                         std::int64_t n, std::int64_t lda) {
+std::int64_t ungtr_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
+                                                         sycl::queue& queue,
+                                                         oneapi::math::uplo uplo, std::int64_t n,
+                                                         std::int64_t lda) {
     return function_tables[{ libkey, queue }].zungtr_scratchpad_size_sycl(queue, uplo, n, lda);
 }
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmrq_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t k, std::int64_t lda,
                                                         std::int64_t ldc) {
@@ -2576,19 +2583,17 @@ std::int64_t unmrq_scratchpad_size<std::complex<float>>(oneapi::mkl::device libk
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t unmrq_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc) {
+std::int64_t unmrq_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc) {
     return function_tables[{ libkey, queue }].zunmrq_scratchpad_size_sycl(queue, side, trans, m, n,
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t unmqr_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmqr_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t k, std::int64_t lda,
                                                         std::int64_t ldc) {
@@ -2596,37 +2601,33 @@ std::int64_t unmqr_scratchpad_size<std::complex<float>>(oneapi::mkl::device libk
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t unmqr_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t k, std::int64_t lda,
-                                                         std::int64_t ldc) {
+std::int64_t unmqr_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda,
+    std::int64_t ldc) {
     return function_tables[{ libkey, queue }].zunmqr_scratchpad_size_sycl(queue, side, trans, m, n,
                                                                           k, lda, ldc);
 }
 template <>
-std::int64_t unmtr_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
-                                                        sycl::queue& queue, oneapi::mkl::side side,
-                                                        oneapi::mkl::uplo uplo,
-                                                        oneapi::mkl::transpose trans,
+std::int64_t unmtr_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
+                                                        sycl::queue& queue, oneapi::math::side side,
+                                                        oneapi::math::uplo uplo,
+                                                        oneapi::math::transpose trans,
                                                         std::int64_t m, std::int64_t n,
                                                         std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].cunmtr_scratchpad_size_sycl(queue, side, uplo, trans,
                                                                           m, n, lda, ldc);
 }
 template <>
-std::int64_t unmtr_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
-                                                         sycl::queue& queue, oneapi::mkl::side side,
-                                                         oneapi::mkl::uplo uplo,
-                                                         oneapi::mkl::transpose trans,
-                                                         std::int64_t m, std::int64_t n,
-                                                         std::int64_t lda, std::int64_t ldc) {
+std::int64_t unmtr_scratchpad_size<std::complex<double>>(
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side,
+    oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n,
+    std::int64_t lda, std::int64_t ldc) {
     return function_tables[{ libkey, queue }].zunmtr_scratchpad_size_sycl(queue, side, uplo, trans,
                                                                           m, n, lda, ldc);
 }
 template <>
-std::int64_t getrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getrf_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t m, std::int64_t n, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t stride_ipiv,
                                                 std::int64_t batch_size) {
@@ -2634,7 +2635,7 @@ std::int64_t getrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, m, n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <>
-std::int64_t getrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getrf_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t m, std::int64_t n, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t stride_ipiv,
                                                  std::int64_t batch_size) {
@@ -2643,20 +2644,20 @@ std::int64_t getrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].cgetrf_batch_scratchpad_size_sycl(
         queue, m, n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <>
 std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].zgetrf_batch_scratchpad_size_sycl(
         queue, m, n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <>
-std::int64_t getri_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getri_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t n, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t stride_ipiv,
                                                 std::int64_t batch_size) {
@@ -2664,7 +2665,7 @@ std::int64_t getri_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <>
-std::int64_t getri_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getri_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t n, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t stride_ipiv,
                                                  std::int64_t batch_size) {
@@ -2673,21 +2674,21 @@ std::int64_t getri_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
     std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].cgetri_batch_scratchpad_size_sycl(
         queue, n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <>
 std::int64_t getri_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda,
     std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].zgetri_batch_scratchpad_size_sycl(
         queue, n, lda, stride_a, stride_ipiv, batch_size);
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                oneapi::mkl::transpose trans, std::int64_t n,
+std::int64_t getrs_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                                oneapi::math::transpose trans, std::int64_t n,
                                                 std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t stride_ipiv,
                                                 std::int64_t ldb, std::int64_t stride_b,
@@ -2696,8 +2697,8 @@ std::int64_t getrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                 oneapi::mkl::transpose trans, std::int64_t n,
+std::int64_t getrs_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                                 oneapi::math::transpose trans, std::int64_t n,
                                                  std::int64_t nrhs, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t stride_ipiv,
                                                  std::int64_t ldb, std::int64_t stride_b,
@@ -2707,7 +2708,7 @@ std::int64_t getrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
     std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].cgetrs_batch_scratchpad_size_sycl(
@@ -2715,14 +2716,14 @@ std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n,
     std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv,
     std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].zgetrs_batch_scratchpad_size_sycl(
         queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
 }
 template <>
-std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t m, std::int64_t n, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t stride_tau,
                                                 std::int64_t batch_size) {
@@ -2730,7 +2731,7 @@ std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, m, n, lda, stride_a, stride_tau, batch_size);
 }
 template <>
-std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t m, std::int64_t n, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t stride_tau,
                                                  std::int64_t batch_size) {
@@ -2739,29 +2740,29 @@ std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].cgeqrf_batch_scratchpad_size_sycl(
         queue, m, n, lda, stride_a, stride_tau, batch_size);
 }
 template <>
 std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].zgeqrf_batch_scratchpad_size_sycl(
         queue, m, n, lda, stride_a, stride_tau, batch_size);
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrf_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                                oneapi::math::uplo uplo, std::int64_t n,
                                                 std::int64_t lda, std::int64_t stride_a,
                                                 std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].spotrf_batch_scratchpad_size_sycl(
         queue, uplo, n, lda, stride_a, batch_size);
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                 oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrf_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                                 oneapi::math::uplo uplo, std::int64_t n,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].dpotrf_batch_scratchpad_size_sycl(
@@ -2769,21 +2770,21 @@ std::int64_t potrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
     std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].cpotrf_batch_scratchpad_size_sycl(
         queue, uplo, n, lda, stride_a, batch_size);
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
     std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].zpotrf_batch_scratchpad_size_sycl(
         queue, uplo, n, lda, stride_a, batch_size);
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrs_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                                oneapi::math::uplo uplo, std::int64_t n,
                                                 std::int64_t nrhs, std::int64_t lda,
                                                 std::int64_t stride_a, std::int64_t ldb,
                                                 std::int64_t stride_b, std::int64_t batch_size) {
@@ -2791,8 +2792,8 @@ std::int64_t potrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                 oneapi::mkl::uplo uplo, std::int64_t n,
+std::int64_t potrs_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                                 oneapi::math::uplo uplo, std::int64_t n,
                                                  std::int64_t nrhs, std::int64_t lda,
                                                  std::int64_t stride_a, std::int64_t ldb,
                                                  std::int64_t stride_b, std::int64_t batch_size) {
@@ -2801,7 +2802,7 @@ std::int64_t potrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
     std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].cpotrs_batch_scratchpad_size_sycl(
@@ -2809,14 +2810,14 @@ std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n,
     std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb,
     std::int64_t stride_b, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].zpotrs_batch_scratchpad_size_sycl(
         queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
 }
 template <>
-std::int64_t orgqr_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t orgqr_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t m, std::int64_t n, std::int64_t k,
                                                 std::int64_t lda, std::int64_t stride_a,
                                                 std::int64_t stride_tau, std::int64_t batch_size) {
@@ -2824,7 +2825,7 @@ std::int64_t orgqr_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, m, n, k, lda, stride_a, stride_tau, batch_size);
 }
 template <>
-std::int64_t orgqr_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t orgqr_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t m, std::int64_t n, std::int64_t k,
                                                  std::int64_t lda, std::int64_t stride_a,
                                                  std::int64_t stride_tau, std::int64_t batch_size) {
@@ -2833,20 +2834,20 @@ std::int64_t orgqr_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].cungqr_batch_scratchpad_size_sycl(
         queue, m, n, k, lda, stride_a, stride_tau, batch_size);
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k,
     std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) {
     return function_tables[{ libkey, queue }].zungqr_batch_scratchpad_size_sycl(
         queue, m, n, k, lda, stride_a, stride_tau, batch_size);
 }
 template <>
-std::int64_t getrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getrf_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t* m, std::int64_t* n, std::int64_t* lda,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
@@ -2854,7 +2855,7 @@ std::int64_t getrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getrf_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t* m, std::int64_t* n,
                                                  std::int64_t* lda, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
@@ -2862,7 +2863,7 @@ std::int64_t getrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                               sycl::queue& queue, std::int64_t* m,
                                                               std::int64_t* n, std::int64_t* lda,
                                                               std::int64_t group_count,
@@ -2871,7 +2872,7 @@ std::int64_t getrf_batch_scratchpad_size<std::complex<float>>(oneapi::mkl::devic
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                                sycl::queue& queue, std::int64_t* m,
                                                                std::int64_t* n, std::int64_t* lda,
                                                                std::int64_t group_count,
@@ -2880,7 +2881,7 @@ std::int64_t getrf_batch_scratchpad_size<std::complex<double>>(oneapi::mkl::devi
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getri_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getri_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t* n, std::int64_t* lda,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
@@ -2888,7 +2889,7 @@ std::int64_t getri_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getri_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t getri_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t* n, std::int64_t* lda,
                                                  std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
@@ -2896,7 +2897,7 @@ std::int64_t getri_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
         queue, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getri_batch_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t getri_batch_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                               sycl::queue& queue, std::int64_t* n,
                                                               std::int64_t* lda,
                                                               std::int64_t group_count,
@@ -2905,7 +2906,7 @@ std::int64_t getri_batch_scratchpad_size<std::complex<float>>(oneapi::mkl::devic
         queue, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getri_batch_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t getri_batch_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                                sycl::queue& queue, std::int64_t* n,
                                                                std::int64_t* lda,
                                                                std::int64_t group_count,
@@ -2914,8 +2915,8 @@ std::int64_t getri_batch_scratchpad_size<std::complex<double>>(oneapi::mkl::devi
         queue, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                oneapi::mkl::transpose* trans, std::int64_t* n,
+std::int64_t getrs_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                                oneapi::math::transpose* trans, std::int64_t* n,
                                                 std::int64_t* nrhs, std::int64_t* lda,
                                                 std::int64_t* ldb, std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
@@ -2923,8 +2924,8 @@ std::int64_t getrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, trans, n, nrhs, lda, ldb, group_count, group_sizes);
 }
 template <>
-std::int64_t getrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                 oneapi::mkl::transpose* trans, std::int64_t* n,
+std::int64_t getrs_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                                 oneapi::math::transpose* trans, std::int64_t* n,
                                                  std::int64_t* nrhs, std::int64_t* lda,
                                                  std::int64_t* ldb, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
@@ -2933,22 +2934,22 @@ std::int64_t getrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes) {
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].cgetrs_group_scratchpad_size_sycl(
         queue, trans, n, nrhs, lda, ldb, group_count, group_sizes);
 }
 template <>
 std::int64_t getrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n,
-    std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
-    std::int64_t* group_sizes) {
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans,
+    std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb,
+    std::int64_t group_count, std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].zgetrs_group_scratchpad_size_sycl(
         queue, trans, n, nrhs, lda, ldb, group_count, group_sizes);
 }
 template <>
-std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t* m, std::int64_t* n, std::int64_t* lda,
                                                 std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
@@ -2956,7 +2957,7 @@ std::int64_t geqrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t* m, std::int64_t* n,
                                                  std::int64_t* lda, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
@@ -2964,7 +2965,7 @@ std::int64_t geqrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(oneapi::mkl::device libkey,
+std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(oneapi::math::device libkey,
                                                               sycl::queue& queue, std::int64_t* m,
                                                               std::int64_t* n, std::int64_t* lda,
                                                               std::int64_t group_count,
@@ -2973,7 +2974,7 @@ std::int64_t geqrf_batch_scratchpad_size<std::complex<float>>(oneapi::mkl::devic
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(oneapi::mkl::device libkey,
+std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(oneapi::math::device libkey,
                                                                sycl::queue& queue, std::int64_t* m,
                                                                std::int64_t* n, std::int64_t* lda,
                                                                std::int64_t group_count,
@@ -2982,7 +2983,7 @@ std::int64_t geqrf_batch_scratchpad_size<std::complex<double>>(oneapi::mkl::devi
         queue, m, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t orgqr_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t orgqr_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
                                                 std::int64_t* m, std::int64_t* n, std::int64_t* k,
                                                 std::int64_t* lda, std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
@@ -2990,7 +2991,7 @@ std::int64_t orgqr_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, m, n, k, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t orgqr_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
+std::int64_t orgqr_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
                                                  std::int64_t* m, std::int64_t* n, std::int64_t* k,
                                                  std::int64_t* lda, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
@@ -2998,16 +2999,16 @@ std::int64_t orgqr_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
         queue, m, n, k, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                oneapi::mkl::uplo* uplo, std::int64_t* n,
+std::int64_t potrf_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                                oneapi::math::uplo* uplo, std::int64_t* n,
                                                 std::int64_t* lda, std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].spotrf_group_scratchpad_size_sycl(
         queue, uplo, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t potrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                 oneapi::mkl::uplo* uplo, std::int64_t* n,
+std::int64_t potrf_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                                 oneapi::math::uplo* uplo, std::int64_t* n,
                                                  std::int64_t* lda, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].dpotrf_group_scratchpad_size_sycl(
@@ -3015,21 +3016,21 @@ std::int64_t potrf_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
     std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].cpotrf_group_scratchpad_size_sycl(
         queue, uplo, n, lda, group_count, group_sizes);
 }
 template <>
 std::int64_t potrf_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
     std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].zpotrf_group_scratchpad_size_sycl(
         queue, uplo, n, lda, group_count, group_sizes);
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                oneapi::mkl::uplo* uplo, std::int64_t* n,
+std::int64_t potrs_batch_scratchpad_size<float>(oneapi::math::device libkey, sycl::queue& queue,
+                                                oneapi::math::uplo* uplo, std::int64_t* n,
                                                 std::int64_t* nrhs, std::int64_t* lda,
                                                 std::int64_t* ldb, std::int64_t group_count,
                                                 std::int64_t* group_sizes) {
@@ -3037,8 +3038,8 @@ std::int64_t potrs_batch_scratchpad_size<float>(oneapi::mkl::device libkey, sycl
         queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes);
 }
 template <>
-std::int64_t potrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, sycl::queue& queue,
-                                                 oneapi::mkl::uplo* uplo, std::int64_t* n,
+std::int64_t potrs_batch_scratchpad_size<double>(oneapi::math::device libkey, sycl::queue& queue,
+                                                 oneapi::math::uplo* uplo, std::int64_t* n,
                                                  std::int64_t* nrhs, std::int64_t* lda,
                                                  std::int64_t* ldb, std::int64_t group_count,
                                                  std::int64_t* group_sizes) {
@@ -3047,7 +3048,7 @@ std::int64_t potrs_batch_scratchpad_size<double>(oneapi::mkl::device libkey, syc
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
     std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
     std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].cpotrs_group_scratchpad_size_sycl(
@@ -3055,7 +3056,7 @@ std::int64_t potrs_batch_scratchpad_size<std::complex<float>>(
 }
 template <>
 std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n,
+    oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n,
     std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count,
     std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].zpotrs_group_scratchpad_size_sycl(
@@ -3063,14 +3064,14 @@ std::int64_t potrs_batch_scratchpad_size<std::complex<double>>(
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<float>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
     std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].cungqr_group_scratchpad_size_sycl(
         queue, m, n, k, lda, group_count, group_sizes);
 }
 template <>
 std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
-    oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
+    oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n,
     std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) {
     return function_tables[{ libkey, queue }].zungqr_group_scratchpad_size_sycl(
         queue, m, n, k, lda, group_count, group_sizes);
@@ -3078,5 +3079,5 @@ std::int64_t ungqr_batch_scratchpad_size<std::complex<double>>(
 
 } //namespace detail
 } //namespace lapack
-} //namespace mkl
+} //namespace math
 } //namespace oneapi
diff --git a/src/rng/CMakeLists.txt b/src/rng/CMakeLists.txt
index 30df39403..fc36a67c4 100644
--- a/src/rng/CMakeLists.txt
+++ b/src/rng/CMakeLists.txt
@@ -22,26 +22,27 @@ add_subdirectory(backends)
 
 # Recipe for RNG loader object
 if(BUILD_SHARED_LIBS)
-add_library(onemkl_rng OBJECT)
-target_sources(onemkl_rng PRIVATE rng_loader.cpp)
-target_include_directories(onemkl_rng
+add_library(onemath_rng OBJECT)
+add_deprecated_library(onemath_rng)
+target_sources(onemath_rng PRIVATE rng_loader.cpp)
+target_include_directories(onemath_rng
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
-          $<TARGET_FILE_DIR:onemkl>
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
+          $<TARGET_FILE_DIR:onemath>
 )
 
-target_compile_options(onemkl_rng PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(onemath_rng PRIVATE ${ONEMATH_BUILD_COPT})
 
-set_target_properties(onemkl_rng PROPERTIES
+set_target_properties(onemath_rng PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
-  add_sycl_to_target(TARGET onemkl_rng SOURCES rng_loader.cpp)
+  add_sycl_to_target(TARGET onemath_rng SOURCES rng_loader.cpp)
 else()
-  target_link_libraries(onemkl_rng PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(onemath_rng PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
 endif()
diff --git a/src/rng/backends/CMakeLists.txt b/src/rng/backends/CMakeLists.txt
index 9045f7e75..52ddcdd3c 100644
--- a/src/rng/backends/CMakeLists.txt
+++ b/src/rng/backends/CMakeLists.txt
@@ -17,8 +17,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-add_custom_target(onemkl_backend_libs_rng)
-add_dependencies(onemkl_backend_libs onemkl_backend_libs_rng)
+add_custom_target(onemath_backend_libs_rng)
+add_dependencies(onemath_backend_libs onemath_backend_libs_rng)
 
 if(ENABLE_MKLCPU_BACKEND)
   add_subdirectory(mklcpu)
diff --git a/src/rng/backends/curand/CMakeLists.txt b/src/rng/backends/curand/CMakeLists.txt
index f37a34f1d..566db6422 100644
--- a/src/rng/backends/curand/CMakeLists.txt
+++ b/src/rng/backends/curand/CMakeLists.txt
@@ -56,27 +56,27 @@
 # so.
 #=================================================================================
 
-set(LIB_NAME onemkl_rng_curand)
+set(LIB_NAME onemath_rng_curand)
 set(LIB_OBJ ${LIB_NAME}_obj)
 find_package(cuRAND REQUIRED)
 
 set(SOURCES philox4x32x10.cpp
   mrg32k3a.cpp
-  $<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_rng_curand_wrappers.cpp>)
+  $<$<BOOL:${BUILD_SHARED_LIBS}>: curand_wrappers.cpp>)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_rng ${LIB_NAME})
+add_dependencies(onemath_backend_libs_rng ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
-          ${MKL_INCLUDE}
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL ONEMKL::cuRAND::cuRAND)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL ONEMATH::cuRAND::cuRAND)
 target_compile_features(${LIB_OBJ} PUBLIC cxx_std_11)
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
@@ -95,8 +95,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/rng/backends/curand/curand_helper.hpp b/src/rng/backends/curand/curand_helper.hpp
index 3926e6283..41f085862 100644
--- a/src/rng/backends/curand/curand_helper.hpp
+++ b/src/rng/backends/curand/curand_helper.hpp
@@ -60,17 +60,17 @@
  * @file curand_helper.cpp : contains the implementation of all the routines
  * for CUDA backend
  */
-#ifndef _MKL_RNG_CURAND_HELPER_HPP_
-#define _MKL_RNG_CURAND_HELPER_HPP_
+#ifndef ONEMATH_RNG_CURAND_HELPER_HPP_
+#define ONEMATH_RNG_CURAND_HELPER_HPP_
 #include <cuda.h>
 #include <curand.h>
 
 #include <complex>
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace curand {
 
@@ -190,7 +190,7 @@ class cuda_error : virtual public std::runtime_error {
         throw curand_error(std::string(#func) + std::string(" : "), status); \
     }
 
-// Static template functions oneapi::mkl::rng::curand::range_transform_fp for
+// Static template functions oneapi::math::rng::curand::range_transform_fp for
 // Buffer and USM APIs
 //
 // cuRAND has no built-in functionality to specify a custom range for sampling
@@ -252,7 +252,7 @@ static inline sycl::event range_transform_fp_accurate(sycl::queue& queue, T a, T
     });
 }
 
-// Static template functions oneapi::mkl::rng::curand::range_transform_int for
+// Static template functions oneapi::math::rng::curand::range_transform_int for
 // Buffer and USM APIs
 //
 // cuRAND has no built-in functionality to specify a custom range for sampling
@@ -285,7 +285,7 @@ inline sycl::event range_transform_int(sycl::queue& queue, T a, T b, std::int64_
                               [=](sycl::id<1> id) { out[id] = a + in[id] % (b - a); });
 }
 
-// Static template functions oneapi::mkl::rng::curand::sample_bernoulli for
+// Static template functions oneapi::math::rng::curand::sample_bernoulli for
 // Buffer and USM APIs
 //
 // cuRAND has no built-in functionality to sample from a Bernoulli distribution.
@@ -320,7 +320,7 @@ static inline sycl::event sample_bernoulli_from_uniform(sycl::queue& queue, floa
 
 } // namespace curand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _MKL_RNG_CURAND_HELPER_HPP_
+#endif // ONEMATH_RNG_CURAND_HELPER_HPP_
diff --git a/src/rng/backends/curand/curand_task.hpp b/src/rng/backends/curand/curand_task.hpp
index 0cd46b203..4bd9c812a 100644
--- a/src/rng/backends/curand/curand_task.hpp
+++ b/src/rng/backends/curand/curand_task.hpp
@@ -1,5 +1,5 @@
-#ifndef _MKL_RNG_CURAND_TASK_HPP_
-#define _MKL_RNG_CURAND_TASK_HPP_
+#ifndef ONEMATH_RNG_CURAND_TASK_HPP_
+#define ONEMATH_RNG_CURAND_TASK_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -10,7 +10,7 @@
 #include "curand_helper.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace curand {
 #ifdef __HIPSYCL__
@@ -73,18 +73,18 @@ static inline void host_task_internal(H& cgh, E e, F f) {
 }
 #endif
 template <typename H, typename A, typename E, typename F>
-static inline void onemkl_curand_host_task(H& cgh, A acc, E e, F f) {
+static inline void onemath_curand_host_task(H& cgh, A acc, E e, F f) {
     host_task_internal(cgh, acc, e, f);
 }
 
 template <typename H, typename Engine, typename F>
-static inline void onemkl_curand_host_task(H& cgh, Engine e, F f) {
+static inline void onemath_curand_host_task(H& cgh, Engine e, F f) {
     host_task_internal(cgh, e, f);
 }
 
 } // namespace curand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif
diff --git a/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp b/src/rng/backends/curand/curand_wrappers.cpp
similarity index 90%
rename from src/rng/backends/curand/mkl_rng_curand_wrappers.cpp
rename to src/rng/backends/curand/curand_wrappers.cpp
index 393433c81..8ebc8225e 100644
--- a/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp
+++ b/src/rng/backends/curand/curand_wrappers.cpp
@@ -56,13 +56,13 @@
  * so.
  ******************************************************************************/
 
-#include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp"
+#include "oneapi/math/rng/detail/curand/onemath_rng_curand.hpp"
 #include "rng/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = {
-    WRAPPER_VERSION, oneapi::mkl::rng::curand::create_philox4x32x10,
-    oneapi::mkl::rng::curand::create_philox4x32x10, oneapi::mkl::rng::curand::create_mrg32k3a,
-    oneapi::mkl::rng::curand::create_mrg32k3a
+extern "C" ONEMATH_EXPORT rng_function_table_t onemath_rng_table = {
+    WRAPPER_VERSION, oneapi::math::rng::curand::create_philox4x32x10,
+    oneapi::math::rng::curand::create_philox4x32x10, oneapi::math::rng::curand::create_mrg32k3a,
+    oneapi::math::rng::curand::create_mrg32k3a
 };
diff --git a/src/rng/backends/curand/mrg32k3a.cpp b/src/rng/backends/curand/mrg32k3a.cpp
index dd44f4def..14ce97adc 100644
--- a/src/rng/backends/curand/mrg32k3a.cpp
+++ b/src/rng/backends/curand/mrg32k3a.cpp
@@ -74,45 +74,45 @@
 
 #include "curand_helper.hpp"
 #include "curand_task.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-#include "oneapi/mkl/rng/engines.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/curand/onemath_rng_curand.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/engines.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace curand {
 
 #if !defined(_WIN64)
-class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
+class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     mrg32k3a_impl(sycl::queue queue, std::uint32_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
+            : oneapi::math::rng::detail::engine_impl(queue) {
         curandStatus_t status;
         CURAND_CALL(curandCreateGenerator, status, &engine_, CURAND_RNG_PSEUDO_MRG32K3A);
         CURAND_CALL(curandSetPseudoRandomGeneratorSeed, status, engine_, (unsigned long long)seed);
     }
 
     mrg32k3a_impl(sycl::queue queue, std::initializer_list<std::uint32_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine",
-                                         "multi-seed unsupported by cuRAND backend");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine",
+                                          "multi-seed unsupported by cuRAND backend");
     }
 
-    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine",
-                                         "copy construction unsupported by cuRAND backend");
+    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::math::rng::detail::engine_impl(*other) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine",
+                                          "copy construction unsupported by cuRAND backend");
     }
 
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r_ptr, n);
             });
@@ -120,12 +120,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r_ptr, n);
             });
@@ -133,13 +133,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
         sycl::buffer<std::uint32_t, 1> ib(n);
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = ib.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, r_ptr, n);
             });
@@ -148,11 +148,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r_ptr, n);
             });
@@ -160,12 +160,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate<float>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r_ptr, n);
             });
@@ -173,12 +173,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate<double>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormal, status, engine_, r_ptr, n, distr.mean(),
                             distr.stddev());
@@ -186,12 +186,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         });
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormalDouble, status, engine_, r_ptr, n, distr.mean(),
                             distr.stddev());
@@ -200,27 +200,27 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormal, status, engine_, r_ptr, n, distr.m(),
                             distr.s());
@@ -228,12 +228,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         });
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormalDouble, status, engine_, r_ptr, n, distr.m(),
                             distr.s());
@@ -242,45 +242,45 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
@@ -289,7 +289,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
                           sycl::buffer<std::uint32_t, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.template get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, r_ptr, n);
             });
@@ -299,11 +299,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r, n);
             });
@@ -312,11 +312,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r, n);
             });
@@ -325,7 +326,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         auto usm_deleter = [this](std::uint32_t* ptr) {
@@ -337,7 +338,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         sycl::event::wait_and_throw(dependencies);
 
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, ib, n);
             });
@@ -348,11 +349,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r, n);
             });
@@ -362,11 +363,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r, n);
             });
@@ -375,12 +377,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormal, status, engine_, r, n, distr.mean(),
                             distr.stddev());
@@ -389,12 +391,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormalDouble, status, engine_, r, n, distr.mean(),
                             distr.stddev());
@@ -403,30 +405,30 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormal, status, engine_, r, n, distr.m(), distr.s());
             });
@@ -434,12 +436,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormalDouble, status, engine_, r, n, distr.m(),
                             distr.s());
@@ -448,18 +450,19 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -468,7 +471,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -477,7 +480,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -486,7 +489,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -495,7 +498,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -505,14 +508,14 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
                                  const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, r, n);
             });
         });
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new mrg32k3a_impl(this);
     }
 
@@ -522,12 +525,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "skip_ahead",
-                                         "initializer list unsupported by cuRAND backend");
+        throw oneapi::math::unimplemented("rng", "skip_ahead",
+                                          "initializer list unsupported by cuRAND backend");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog", "unsupported by cuRAND backend");
+        throw oneapi::math::unimplemented("rng", "leapfrog", "unsupported by cuRAND backend");
     }
 
     virtual ~mrg32k3a_impl() override {
@@ -539,290 +542,293 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     std::uint32_t seed_;
 };
 #else // cuRAND backend is currently not supported on Windows
-class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
+class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     mrg32k3a_impl(sycl::queue queue, std::uint32_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     mrg32k3a_impl(sycl::queue queue, std::initializer_list<std::uint32_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::math::rng::detail::engine_impl(*other) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const bits<std::uint32_t>& distr, std::int64_t n,
                           sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bits<std::uint32_t>& distr, std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return nullptr;
     }
 
     virtual void skip_ahead(std::uint64_t num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual ~mrg32k3a_impl() override {}
 };
 #endif
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                       std::initializer_list<std::uint32_t> seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                        std::initializer_list<std::uint32_t> seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
 } // namespace curand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/curand/philox4x32x10.cpp b/src/rng/backends/curand/philox4x32x10.cpp
index c3d4393d2..7f2c829ec 100644
--- a/src/rng/backends/curand/philox4x32x10.cpp
+++ b/src/rng/backends/curand/philox4x32x10.cpp
@@ -72,15 +72,15 @@
 #endif
 #include <iostream>
 
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-// #include "oneapi/mkl/rng/engines.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+// #include "oneapi/math/rng/engines.hpp"
 #include "curand_helper.hpp"
 #include "curand_task.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/curand/onemath_rng_curand.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace curand {
 
@@ -103,39 +103,39 @@ namespace curand {
  * consumed in other kernels without requiring the random numbers to be written
  * to, and read from, global memory.
  *
- * Here we utilize the host API since this is most aligned with how oneMKL
+ * Here we utilize the host API since this is most aligned with how oneMath
  * generates random numbers.
  *
  */
-class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
+class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     philox4x32x10_impl(sycl::queue queue, std::uint64_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
+            : oneapi::math::rng::detail::engine_impl(queue) {
         curandStatus_t status;
         CURAND_CALL(curandCreateGenerator, status, &engine_, CURAND_RNG_PSEUDO_PHILOX4_32_10);
         CURAND_CALL(curandSetPseudoRandomGeneratorSeed, status, engine_, (unsigned long long)seed);
     }
 
     philox4x32x10_impl(sycl::queue queue, std::initializer_list<std::uint64_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine",
-                                         "multi-seed unsupported by cuRAND backend");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine",
+                                          "multi-seed unsupported by cuRAND backend");
     }
 
     philox4x32x10_impl(const philox4x32x10_impl* other)
-            : oneapi::mkl::rng::detail::engine_impl(*other) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine",
-                                         "copy construction unsupported by cuRAND backend");
+            : oneapi::math::rng::detail::engine_impl(*other) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine",
+                                          "copy construction unsupported by cuRAND backend");
     }
 
     // Buffers API
 
     virtual inline void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r_ptr, n);
             });
@@ -143,12 +143,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r_ptr, n);
             });
@@ -156,13 +156,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
         sycl::buffer<std::uint32_t, 1> ib(n);
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = ib.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, r_ptr, n);
             });
@@ -171,11 +171,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r_ptr, n);
             });
@@ -183,12 +183,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r_ptr, n);
             });
@@ -196,12 +196,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormal, status, engine_, r_ptr, n, distr.mean(),
                             distr.stddev());
@@ -209,12 +209,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         });
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormalDouble, status, engine_, r_ptr, n, distr.mean(),
                             distr.stddev());
@@ -223,27 +223,27 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormal, status, engine_, r_ptr, n, distr.m(),
                             distr.s());
@@ -251,12 +251,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         });
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormalDouble, status, engine_, r_ptr, n, distr.m(),
                             distr.s());
@@ -265,45 +265,45 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
     }
@@ -312,7 +312,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
                           sycl::buffer<std::uint32_t, 1>& r) override {
         queue_.submit([&](sycl::handler& cgh) {
             auto acc = r.template get_access<sycl::access::mode::read_write>(cgh);
-            onemkl_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+            onemath_curand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, r_ptr, n);
             });
@@ -322,11 +322,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r, n);
             });
@@ -335,11 +335,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r, n);
             });
@@ -348,7 +349,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         auto usm_deleter = [this](std::uint32_t* ptr) {
@@ -360,7 +361,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         sycl::event::wait_and_throw(dependencies);
 
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, ib, n);
             });
@@ -371,11 +372,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniform, status, engine_, r, n);
             });
@@ -384,11 +385,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateUniformDouble, status, engine_, r, n);
             });
@@ -397,12 +399,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormal, status, engine_, r, n, distr.mean(),
                             distr.stddev());
@@ -411,12 +413,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateNormalDouble, status, engine_, r, n, distr.mean(),
                             distr.stddev());
@@ -425,30 +427,30 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormal, status, engine_, r, n, distr.m(), distr.s());
             });
@@ -456,12 +458,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerateLogNormalDouble, status, engine_, r, n, distr.m(),
                             distr.s());
@@ -470,18 +472,19 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -490,7 +493,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -499,7 +502,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -508,7 +511,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -517,7 +520,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "ICDF method not used for pseudorandom generators in cuRAND backend");
         return sycl::event{};
@@ -527,14 +530,14 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
                                  const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         return queue_.submit([&](sycl::handler& cgh) {
-            onemkl_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_curand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 curandStatus_t status;
                 CURAND_CALL(curandGenerate, status, engine_, r, n);
             });
         });
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new philox4x32x10_impl(this);
     }
 
@@ -544,12 +547,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "skip_ahead",
-                                         "initializer list unsupported by cuRAND backend");
+        throw oneapi::math::unimplemented("rng", "skip_ahead",
+                                          "initializer list unsupported by cuRAND backend");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog", "unsupported by cuRAND backend");
+        throw oneapi::math::unimplemented("rng", "leapfrog", "unsupported by cuRAND backend");
     }
 
     virtual ~philox4x32x10_impl() override {
@@ -560,291 +563,295 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     curandGenerator_t engine_;
 };
 #else // cuRAND backend is currently not supported on Windows
-class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
+class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     philox4x32x10_impl(sycl::queue queue, std::uint64_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     philox4x32x10_impl(sycl::queue queue, std::initializer_list<std::uint64_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     philox4x32x10_impl(const philox4x32x10_impl* other)
-            : oneapi::mkl::rng::detail::engine_impl(*other) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+            : oneapi::math::rng::detail::engine_impl(*other) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const bits<std::uint32_t>& distr, std::int64_t n,
                           sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bits<std::uint32_t>& distr, std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return nullptr;
     }
 
     virtual void skip_ahead(std::uint64_t num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual ~philox4x32x10_impl() override {}
 };
 #endif
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) {
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                             std::uint64_t seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
 } // namespace curand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/mklcpu/CMakeLists.txt b/src/rng/backends/mklcpu/CMakeLists.txt
index e72ce048f..edc122a6e 100644
--- a/src/rng/backends/mklcpu/CMakeLists.txt
+++ b/src/rng/backends/mklcpu/CMakeLists.txt
@@ -17,7 +17,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_rng_mklcpu)
+set(LIB_NAME onemath_rng_mklcpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 set(SOURCES cpu_common.hpp
@@ -27,30 +27,32 @@ set(SOURCES cpu_common.hpp
 )
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_rng ${LIB_NAME})
+add_dependencies(onemath_backend_libs_rng ${LIB_NAME})
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET ${LIB_OBJ} SOURCES ${SOURCES})
 endif()
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL)
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-# Set oneMKL libraries as not transitive for dynamic
+# Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -63,8 +65,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/rng/backends/mklcpu/cpu_common.hpp b/src/rng/backends/mklcpu/cpu_common.hpp
index a65338c91..559f27960 100644
--- a/src/rng/backends/mklcpu/cpu_common.hpp
+++ b/src/rng/backends/mklcpu/cpu_common.hpp
@@ -27,7 +27,7 @@
 #endif
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace mklcpu {
 
@@ -68,7 +68,7 @@ typename Acc::value_type* get_raw_ptr(Acc acc) {
 
 } // namespace mklcpu
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif //_RNG_CPU_COMMON_HPP_
diff --git a/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp b/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp
index 840205db7..dce8d4830 100644
--- a/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp
+++ b/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp
@@ -18,12 +18,12 @@
 *******************************************************************************/
 
 #include "rng/function_table.hpp"
-#include "oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp"
+#include "oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = {
-    WRAPPER_VERSION, oneapi::mkl::rng::mklcpu::create_philox4x32x10,
-    oneapi::mkl::rng::mklcpu::create_philox4x32x10, oneapi::mkl::rng::mklcpu::create_mrg32k3a,
-    oneapi::mkl::rng::mklcpu::create_mrg32k3a
+extern "C" ONEMATH_EXPORT rng_function_table_t onemath_rng_table = {
+    WRAPPER_VERSION, oneapi::math::rng::mklcpu::create_philox4x32x10,
+    oneapi::math::rng::mklcpu::create_philox4x32x10, oneapi::math::rng::mklcpu::create_mrg32k3a,
+    oneapi::math::rng::mklcpu::create_mrg32k3a
 };
diff --git a/src/rng/backends/mklcpu/mrg32k3a.cpp b/src/rng/backends/mklcpu/mrg32k3a.cpp
index cc234de45..3bd79c400 100644
--- a/src/rng/backends/mklcpu/mrg32k3a.cpp
+++ b/src/rng/backends/mklcpu/mrg32k3a.cpp
@@ -24,35 +24,36 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "mkl_vsl.h"
+// Intel(R) oneMKL header
+#include <mkl_vsl.h>
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-#include "oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp"
 
 #include "cpu_common.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace mklcpu {
 
-class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
+class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     mrg32k3a_impl(sycl::queue queue, std::uint32_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
+            : oneapi::math::rng::detail::engine_impl(queue) {
         vslNewStream(&stream_, VSL_BRNG_MRG32K3A, seed);
         state_size_ = vslGetStreamSize(stream_);
     }
 
     mrg32k3a_impl(sycl::queue queue, std::initializer_list<std::uint32_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
+            : oneapi::math::rng::detail::engine_impl(queue) {
         vslNewStreamEx(&stream_, VSL_BRNG_MRG32K3A, 2 * seed.size(),
                        reinterpret_cast<const std::uint32_t*>(seed.begin()));
         state_size_ = vslGetStreamSize(stream_);
     }
 
-    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) {
+    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::math::rng::detail::engine_impl(*other) {
         vslCopyStream(&stream_, other->stream_);
         state_size_ = vslGetStreamSize(stream_);
     }
@@ -545,7 +546,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         });
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new mrg32k3a_impl(this);
     }
 
@@ -558,7 +559,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog");
+        throw oneapi::math::unimplemented("rng", "leapfrog");
     }
 
     virtual ~mrg32k3a_impl() override {
@@ -570,16 +571,16 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     std::int32_t state_size_;
 };
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                       std::initializer_list<std::uint32_t> seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                        std::initializer_list<std::uint32_t> seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
 } // namespace mklcpu
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/mklcpu/philox4x32x10.cpp b/src/rng/backends/mklcpu/philox4x32x10.cpp
index 3f8e5e89b..144ced995 100644
--- a/src/rng/backends/mklcpu/philox4x32x10.cpp
+++ b/src/rng/backends/mklcpu/philox4x32x10.cpp
@@ -24,37 +24,38 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "mkl_vsl.h"
+// Intel(R) oneMKL header
+#include <mkl_vsl.h>
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-#include "oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp"
 
 #include "cpu_common.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace mklcpu {
 
-class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
+class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     philox4x32x10_impl(sycl::queue queue, std::uint64_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
+            : oneapi::math::rng::detail::engine_impl(queue) {
         vslNewStreamEx(&stream_, VSL_BRNG_PHILOX4X32X10, 2,
                        reinterpret_cast<std::uint32_t*>(&seed));
         state_size_ = vslGetStreamSize(stream_);
     }
 
     philox4x32x10_impl(sycl::queue queue, std::initializer_list<std::uint64_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
+            : oneapi::math::rng::detail::engine_impl(queue) {
         vslNewStreamEx(&stream_, VSL_BRNG_PHILOX4X32X10, 2 * seed.size(),
                        reinterpret_cast<const std::uint32_t*>(seed.begin()));
         state_size_ = vslGetStreamSize(stream_);
     }
 
     philox4x32x10_impl(const philox4x32x10_impl* other)
-            : oneapi::mkl::rng::detail::engine_impl(*other) {
+            : oneapi::math::rng::detail::engine_impl(*other) {
         vslCopyStream(&stream_, other->stream_);
         state_size_ = vslGetStreamSize(stream_);
     }
@@ -547,7 +548,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         });
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new philox4x32x10_impl(this);
     }
 
@@ -560,7 +561,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog");
+        throw oneapi::math::unimplemented("rng", "leapfrog");
     }
 
     virtual ~philox4x32x10_impl() override {
@@ -572,16 +573,17 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     std::int32_t state_size_;
 };
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) {
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                             std::uint64_t seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
 } // namespace mklcpu
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/mklgpu/CMakeLists.txt b/src/rng/backends/mklgpu/CMakeLists.txt
index 150f90136..3e628b8e0 100644
--- a/src/rng/backends/mklgpu/CMakeLists.txt
+++ b/src/rng/backends/mklgpu/CMakeLists.txt
@@ -17,42 +17,44 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_rng_mklgpu)
+set(LIB_NAME onemath_rng_mklgpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
-  mkl_internal_rng_gpu.hpp
   philox4x32x10.cpp
   mrg32k3a.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_rng_gpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_rng ${LIB_NAME})
+add_dependencies(onemath_backend_libs_rng ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
+          ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::RNG)
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_SYCL::RNG)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_SYCL::RNG)
 else()
-  target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL MKL::MKL_DPCPP)
+  target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL MKL::MKL_DPCPP)
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-# Set oneMKL libraries as not transitive for dynamic
+# Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -65,8 +67,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp b/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp
deleted file mode 100755
index 5ca480ba5..000000000
--- a/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*******************************************************************************
-* Copyright 2020-2021 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions
-* and limitations under the License.
-*
-*
-* SPDX-License-Identifier: Apache-2.0
-*******************************************************************************/
-
-#ifndef _MKL_INTERNAL_RNG_GPU_HPP_
-#define _MKL_INTERNAL_RNG_GPU_HPP_
-
-#if __has_include(<sycl/sycl.hpp>)
-#include <sycl/sycl.hpp>
-#else
-#include <CL/sycl.hpp>
-#endif
-
-namespace oneapi {
-namespace mkl {
-namespace rng {
-namespace detail {
-
-template <typename EngineType>
-class engine_base_impl;
-
-namespace gpu {
-
-template <typename EngineType>
-engine_base_impl<EngineType>* create_engine(sycl::queue& queue, std::uint64_t seed);
-
-template <typename EngineType>
-engine_base_impl<EngineType>* create_engine(sycl::queue& queue, std::int64_t n,
-                                            const unsigned int* seed_ptr);
-
-template <typename EngineType>
-engine_base_impl<EngineType>* create_engine(sycl::queue& queue,
-                                            engine_base_impl<EngineType>* other_impl);
-
-template <typename EngineType>
-void skip_ahead(sycl::queue& queue, engine_base_impl<EngineType>* impl, std::uint64_t num_to_skip);
-
-template <typename EngineType>
-void skip_ahead(sycl::queue& queue, engine_base_impl<EngineType>* impl,
-                std::initializer_list<std::uint64_t> num_to_skip);
-
-template <typename EngineType>
-void leapfrog(sycl::queue& queue, engine_base_impl<EngineType>* impl, std::uint64_t idx,
-              std::uint64_t stride);
-
-template <typename EngineType>
-void delete_engine(sycl::queue& queue, engine_base_impl<EngineType>* impl);
-
-template <typename EngineType, typename DistrType>
-sycl::event generate(sycl::queue& queue, const DistrType& distr,
-                     engine_base_impl<EngineType>* engine, std::int64_t n,
-                     sycl::buffer<typename DistrType::result_type, 1>& r);
-
-template <typename EngineType, typename DistrType>
-sycl::event generate(sycl::queue& queue, const DistrType& distr,
-                     engine_base_impl<EngineType>* engine, std::int64_t n,
-                     typename DistrType::result_type* r,
-                     const std::vector<sycl::event>& dependencies = {});
-
-} // namespace gpu
-} // namespace detail
-} // namespace rng
-} // namespace mkl
-} // namespace oneapi
-
-#endif //_MKL_INTERNAL_RNG_GPU_HPP_
diff --git a/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp b/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp
index 6754b2749..678b6397b 100644
--- a/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp
+++ b/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp
@@ -18,12 +18,12 @@
 *******************************************************************************/
 
 #include "rng/function_table.hpp"
-#include "oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp"
+#include "oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = {
-    WRAPPER_VERSION, oneapi::mkl::rng::mklgpu::create_philox4x32x10,
-    oneapi::mkl::rng::mklgpu::create_philox4x32x10, oneapi::mkl::rng::mklgpu::create_mrg32k3a,
-    oneapi::mkl::rng::mklgpu::create_mrg32k3a
+extern "C" ONEMATH_EXPORT rng_function_table_t onemath_rng_table = {
+    WRAPPER_VERSION, oneapi::math::rng::mklgpu::create_philox4x32x10,
+    oneapi::math::rng::mklgpu::create_philox4x32x10, oneapi::math::rng::mklgpu::create_mrg32k3a,
+    oneapi::math::rng::mklgpu::create_mrg32k3a
 };
diff --git a/src/rng/backends/mklgpu/mrg32k3a.cpp b/src/rng/backends/mklgpu/mrg32k3a.cpp
index 05d24e1a4..c9a670c30 100644
--- a/src/rng/backends/mklgpu/mrg32k3a.cpp
+++ b/src/rng/backends/mklgpu/mrg32k3a.cpp
@@ -24,294 +24,329 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "mkl_version.h"
+// Intel(R) oneMKL headers
+#include <mkl_version.h>
+#include <mkl/rng.hpp>
 
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-#include "oneapi/mkl/rng/engines.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/engines.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp"
 
-#include "mkl_internal_rng_gpu.hpp"
+#include "onemkl_distribution_conversion.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace mklgpu {
 
-class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
+class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     mrg32k3a_impl(sycl::queue queue, std::uint32_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        engine_ =
-            oneapi::mkl::rng::detail::gpu::create_engine<oneapi::mkl::rng::mrg32k3a>(queue, seed);
-    }
+            : oneapi::math::rng::detail::engine_impl(queue),
+              engine_((oneapi::mkl::rng::mrg32k3a(queue, seed))) {}
 
     mrg32k3a_impl(sycl::queue queue, std::initializer_list<std::uint32_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        engine_ = oneapi::mkl::rng::detail::gpu::create_engine<oneapi::mkl::rng::mrg32k3a>(
-            queue, (std::int64_t)(seed.size()), (const unsigned int*)seed.begin());
-    }
+            : oneapi::math::rng::detail::engine_impl(queue),
+              engine_((oneapi::mkl::rng::mrg32k3a(queue, seed))) {}
 
-    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) {
-        sycl::queue queue(other->queue_);
-        engine_ = oneapi::mkl::rng::detail::gpu::create_engine<oneapi::mkl::rng::mrg32k3a>(
-            queue, other->engine_);
-    }
+    mrg32k3a_impl(const mrg32k3a_impl* other)
+            : oneapi::math::rng::detail::engine_impl(*other),
+              engine_((oneapi::mkl::rng::mrg32k3a(other->engine_))) {}
 
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const bits<std::uint32_t>& distr, std::int64_t n,
-                          sycl::buffer<std::uint32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const bits<std::uint32_t>& /*distr*/, std::int64_t /*n*/,
+                          sycl::buffer<std::uint32_t, 1>& /*r*/) override {
+        throw unimplemented("rng/mklgpu", "mrg32k3a::generate",
+                            "bits distribution is not supported");
     }
 
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         ;
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
-    virtual sycl::event generate(const bits<std::uint32_t>& distr, std::int64_t n, std::uint32_t* r,
-                                 const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+    virtual sycl::event generate(const bits<std::uint32_t>& /*distr*/, std::int64_t /*n*/,
+                                 std::uint32_t* /*r*/,
+                                 const std::vector<sycl::event>& /*dependencies*/) override {
+        throw unimplemented("rng/mklgpu", "mrg32k3a::generate",
+                            "bits distribution is not supported");
+        return {};
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new mrg32k3a_impl(this);
     }
 
     virtual void skip_ahead(std::uint64_t num_to_skip) override {
-        oneapi::mkl::rng::detail::gpu::skip_ahead(queue_, engine_, num_to_skip);
+        RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::skip_ahead(engine_, num_to_skip));
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        oneapi::mkl::rng::detail::gpu::skip_ahead(queue_, engine_, num_to_skip);
+        RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::skip_ahead(engine_, num_to_skip));
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog");
+        throw oneapi::math::unimplemented("rng", "leapfrog");
     }
 
-    virtual ~mrg32k3a_impl() override {
-        oneapi::mkl::rng::detail::gpu::delete_engine(queue_, engine_);
-    }
+    virtual ~mrg32k3a_impl() override {}
 
 private:
-    oneapi::mkl::rng::detail::engine_base_impl<oneapi::mkl::rng::mrg32k3a>* engine_;
+    oneapi::mkl::rng::mrg32k3a engine_;
 };
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                       std::initializer_list<std::uint32_t> seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                        std::initializer_list<std::uint32_t> seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
 } // namespace mklgpu
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/mklgpu/onemkl_distribution_conversion.hpp b/src/rng/backends/mklgpu/onemkl_distribution_conversion.hpp
new file mode 100644
index 000000000..31ecc6796
--- /dev/null
+++ b/src/rng/backends/mklgpu/onemkl_distribution_conversion.hpp
@@ -0,0 +1,133 @@
+/*******************************************************************************
+* Copyright Codeplay Software Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions
+* and limitations under the License.
+*
+*
+* SPDX-License-Identifier: Apache-2.0
+*******************************************************************************/
+
+#ifndef _ONEMATH_SRC_RNG_ONEMKL_DISTRIBUTION_CONVERSION_HPP_
+#define _ONEMATH_SRC_RNG_ONEMKL_DISTRIBUTION_CONVERSION_HPP_
+
+// Convert oneMath RNG distribution types to Intel(R) oneMKL equivalents
+
+#include <mkl/rng.hpp>
+
+#include "common_onemkl_conversion.hpp"
+#include "oneapi/math/rng/distributions.hpp"
+
+namespace oneapi {
+namespace math {
+namespace rng {
+namespace detail {
+
+template <class Method>
+struct convert_method_t;
+
+template <>
+struct convert_method_t<uniform_method::standard> {
+    using type = oneapi::mkl::rng::uniform_method::standard;
+};
+
+template <>
+struct convert_method_t<uniform_method::accurate> {
+    using type = oneapi::mkl::rng::uniform_method::accurate;
+};
+
+template <>
+struct convert_method_t<gaussian_method::icdf> {
+    using type = oneapi::mkl::rng::gaussian_method::icdf;
+};
+
+template <>
+struct convert_method_t<gaussian_method::box_muller2> {
+    using type = oneapi::mkl::rng::gaussian_method::box_muller2;
+};
+
+template <>
+struct convert_method_t<lognormal_method::icdf> {
+    using type = oneapi::mkl::rng::lognormal_method::icdf;
+};
+
+template <>
+struct convert_method_t<lognormal_method::box_muller2> {
+    using type = oneapi::mkl::rng::lognormal_method::box_muller2;
+};
+
+template <>
+struct convert_method_t<bernoulli_method::icdf> {
+    using type = oneapi::mkl::rng::bernoulli_method::icdf;
+};
+
+template <>
+struct convert_method_t<poisson_method::gaussian_icdf_based> {
+    using type = oneapi::mkl::rng::poisson_method::gaussian_icdf_based;
+};
+
+template <class DistributionT>
+struct convert_distrib_t;
+
+template <class T, class Method>
+struct convert_distrib_t<uniform<T, Method>> {
+    auto operator()(uniform<T, Method> distribution) {
+        using onemkl_method_t = typename convert_method_t<Method>::type;
+        return oneapi::mkl::rng::uniform<T, onemkl_method_t>(distribution.a(), distribution.b());
+    }
+};
+
+template <class T, class Method>
+struct convert_distrib_t<gaussian<T, Method>> {
+    auto operator()(gaussian<T, Method> distribution) {
+        using onemkl_method_t = typename convert_method_t<Method>::type;
+        return oneapi::mkl::rng::gaussian<T, onemkl_method_t>(distribution.mean(),
+                                                              distribution.stddev());
+    }
+};
+
+template <class T, class Method>
+struct convert_distrib_t<lognormal<T, Method>> {
+    auto operator()(lognormal<T, Method> distribution) {
+        using onemkl_method_t = typename convert_method_t<Method>::type;
+        return oneapi::mkl::rng::lognormal<T, onemkl_method_t>(
+            distribution.m(), distribution.s(), distribution.displ(), distribution.scale());
+    }
+};
+
+template <class T, class Method>
+struct convert_distrib_t<bernoulli<T, Method>> {
+    auto operator()(bernoulli<T, Method> distribution) {
+        using onemkl_method_t = typename convert_method_t<Method>::type;
+        return oneapi::mkl::rng::bernoulli<T, onemkl_method_t>(distribution.p());
+    }
+};
+
+template <class T, class Method>
+struct convert_distrib_t<poisson<T, Method>> {
+    auto operator()(poisson<T, Method> distribution) {
+        using onemkl_method_t = typename convert_method_t<Method>::type;
+        return oneapi::mkl::rng::poisson<T, onemkl_method_t>(distribution.lambda());
+    }
+};
+
+template <class DistributionT>
+inline auto get_onemkl_distribution(DistributionT distribution) {
+    return convert_distrib_t<DistributionT>()(distribution);
+}
+
+} // namespace detail
+} // namespace rng
+} // namespace math
+} // namespace oneapi
+
+#endif // _ONEMATH_SRC_RNG_ONEMKL_DISTRIBUTION_CONVERSION_HPP_
diff --git a/src/rng/backends/mklgpu/philox4x32x10.cpp b/src/rng/backends/mklgpu/philox4x32x10.cpp
index bcf869c61..3d9263605 100644
--- a/src/rng/backends/mklgpu/philox4x32x10.cpp
+++ b/src/rng/backends/mklgpu/philox4x32x10.cpp
@@ -24,295 +24,330 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "mkl_version.h"
+// Intel(R) oneMKL headers
+#include <mkl_version.h>
+#include <mkl/rng.hpp>
 
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-#include "oneapi/mkl/rng/engines.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/engines.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp"
 
-#include "mkl_internal_rng_gpu.hpp"
+#include "onemkl_distribution_conversion.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace mklgpu {
 
-class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
+class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     philox4x32x10_impl(sycl::queue queue, std::uint64_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        engine_ = oneapi::mkl::rng::detail::gpu::create_engine<oneapi::mkl::rng::philox4x32x10>(
-            queue, seed);
-    }
+            : oneapi::math::rng::detail::engine_impl(queue),
+              engine_((oneapi::mkl::rng::philox4x32x10(queue, seed))) {}
 
     philox4x32x10_impl(sycl::queue queue, std::initializer_list<std::uint64_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        engine_ = oneapi::mkl::rng::detail::gpu::create_engine<oneapi::mkl::rng::philox4x32x10>(
-            queue, (std::int64_t)(seed.size() * 2), (const unsigned int*)seed.begin());
-    }
+            : oneapi::math::rng::detail::engine_impl(queue),
+              engine_((oneapi::mkl::rng::philox4x32x10(queue, seed))) {}
 
     philox4x32x10_impl(const philox4x32x10_impl* other)
-            : oneapi::mkl::rng::detail::engine_impl(*other) {
-        sycl::queue queue(other->queue_);
-        engine_ = oneapi::mkl::rng::detail::gpu::create_engine<oneapi::mkl::rng::philox4x32x10>(
-            queue, other->engine_);
-    }
+            : oneapi::math::rng::detail::engine_impl(*other),
+              engine_((oneapi::mkl::rng::philox4x32x10(other->engine_))) {}
 
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r));
     }
 
-    virtual void generate(const bits<std::uint32_t>& distr, std::int64_t n,
-                          sycl::buffer<std::uint32_t, 1>& r) override {
-        oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r);
+    virtual void generate(const bits<std::uint32_t>& /*distr*/, std::int64_t /*n*/,
+                          sycl::buffer<std::uint32_t, 1>& /*r*/) override {
+        throw unimplemented("rng/mklgpu", "philox4x32x10::generate",
+                            "bits distribution is not supported");
     }
 
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         ;
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(
+            detail::get_onemkl_distribution(distr), engine_, n, r, dependencies));
     }
 
-    virtual sycl::event generate(const bits<std::uint32_t>& distr, std::int64_t n, std::uint32_t* r,
-                                 const std::vector<sycl::event>& dependencies) override {
-        return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies);
+    virtual sycl::event generate(const bits<std::uint32_t>& /*distr*/, std::int64_t /*n*/,
+                                 std::uint32_t* /*r*/,
+                                 const std::vector<sycl::event>& /*dependencies*/) override {
+        throw unimplemented("rng/mklgpu", "philox4x32x10::generate",
+                            "bits distribution is not supported");
+        return {};
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new philox4x32x10_impl(this);
     }
 
     virtual void skip_ahead(std::uint64_t num_to_skip) override {
-        oneapi::mkl::rng::detail::gpu::skip_ahead(queue_, engine_, num_to_skip);
+        RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::skip_ahead(engine_, num_to_skip));
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        oneapi::mkl::rng::detail::gpu::skip_ahead(queue_, engine_, num_to_skip);
+        RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::skip_ahead(engine_, num_to_skip));
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog");
+        throw oneapi::math::unimplemented("rng", "leapfrog");
     }
 
-    virtual ~philox4x32x10_impl() override {
-        oneapi::mkl::rng::detail::gpu::delete_engine(queue_, engine_);
-    }
+    virtual ~philox4x32x10_impl() override {}
 
 private:
-    oneapi::mkl::rng::detail::engine_base_impl<oneapi::mkl::rng::philox4x32x10>* engine_;
+    oneapi::mkl::rng::philox4x32x10 engine_;
 };
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) {
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                             std::uint64_t seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
 } // namespace mklgpu
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/rocrand/CMakeLists.txt b/src/rng/backends/rocrand/CMakeLists.txt
index 47929703b..44dc98a51 100644
--- a/src/rng/backends/rocrand/CMakeLists.txt
+++ b/src/rng/backends/rocrand/CMakeLists.txt
@@ -52,25 +52,29 @@
 # perform publicly and display publicly, and to permit others to do so.
 # =================================================================================
 
-set(LIB_NAME onemkl_rng_rocrand)
+set(LIB_NAME onemath_rng_rocrand)
 set(LIB_OBJ ${LIB_NAME}_obj)
 find_package(hip REQUIRED)
 find_package(rocrand REQUIRED)
 find_package(Threads REQUIRED)
 
 set(SOURCES philox4x32x10.cpp mrg32k3a.cpp $<$<BOOL:${BUILD_SHARED_LIBS}>:
-            mkl_rng_rocrand_wrappers.cpp>)
+            rocrand_wrappers.cpp>)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT ${SOURCES})
-add_dependencies(onemkl_backend_libs_rng ${LIB_NAME})
+add_dependencies(onemath_backend_libs_rng ${LIB_NAME})
 
-target_include_directories(
-  ${LIB_OBJ} PRIVATE ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src
-                     ${CMAKE_BINARY_DIR}/bin ${MKL_INCLUDE} ${ONEMKL_GENERATED_INCLUDE_PATH})
+target_include_directories(${LIB_OBJ}
+  PRIVATE ${PROJECT_SOURCE_DIR}/include
+          ${PROJECT_SOURCE_DIR}/src
+          ${CMAKE_BINARY_DIR}/bin
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
+)
 
 target_link_libraries(${LIB_OBJ} PRIVATE roc::rocrand hip::host Threads::Threads)
-target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL)
 target_compile_features(${LIB_OBJ} PUBLIC cxx_std_11)
 set_target_properties(${LIB_OBJ} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
@@ -85,10 +89,10 @@ set_target_properties(${LIB_NAME} PROPERTIES SOVERSION ${PROJECT_VERSION_MAJOR})
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
 install(
   TARGETS ${LIB_NAME}
-  EXPORT oneMKLTargets
+  EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib)
diff --git a/src/rng/backends/rocrand/mrg32k3a.cpp b/src/rng/backends/rocrand/mrg32k3a.cpp
index 424f14caf..741c45432 100644
--- a/src/rng/backends/rocrand/mrg32k3a.cpp
+++ b/src/rng/backends/rocrand/mrg32k3a.cpp
@@ -74,21 +74,21 @@
 
 #include "rocrand_helper.hpp"
 #include "rocrand_task.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/rocrand/onemkl_rng_rocrand.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-#include "oneapi/mkl/rng/engines.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/engines.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace rocrand {
 
 #if !defined(_WIN64)
-class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
+class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     mrg32k3a_impl(sycl::queue queue, std::uint32_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue),
+            : oneapi::math::rng::detail::engine_impl(queue),
               seed_(seed),
               offset_(0) {
         rocrand_status status;
@@ -97,13 +97,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     mrg32k3a_impl(sycl::queue queue, std::initializer_list<std::uint32_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine",
-                                         "multi-seed unsupported by rocRAND backend");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine",
+                                          "multi-seed unsupported by rocRAND backend");
     }
 
     mrg32k3a_impl(const mrg32k3a_impl* other)
-            : oneapi::mkl::rng::detail::engine_impl(*other),
+            : oneapi::math::rng::detail::engine_impl(*other),
               seed_(other->seed_),
               offset_(other->offset_) {
         rocrand_status status;
@@ -117,12 +117,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r_ptr, n);
                 });
@@ -134,13 +134,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp<float>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r_ptr, n);
                 });
@@ -152,14 +152,14 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp<double>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
         sycl::buffer<std::uint32_t, 1> ib(n);
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = ib.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate, status, engine_, r_ptr, n);
                 });
@@ -172,12 +172,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r_ptr, n);
                 });
@@ -189,13 +189,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate<float>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r_ptr, n);
                 });
@@ -207,13 +207,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate<double>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal, status, engine_, r_ptr, n, distr.mean(),
                                  distr.stddev());
@@ -224,13 +224,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r_ptr, n,
                                  distr.mean(), distr.stddev());
@@ -242,12 +242,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal, status, engine_, r_ptr, n, distr.mean(),
                                  distr.stddev());
@@ -259,12 +259,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r_ptr, n,
                                  distr.mean(), distr.stddev());
@@ -275,13 +275,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r_ptr, n, distr.m(),
                                  distr.s());
@@ -292,13 +292,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r_ptr, n,
                                  distr.m(), distr.s());
@@ -310,12 +310,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r_ptr, n, distr.m(),
                                  distr.s());
@@ -326,13 +326,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r_ptr, n,
                                  distr.m(), distr.s());
@@ -345,14 +345,14 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
     }
@@ -362,7 +362,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](std::int32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](std::int32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_poisson, status, engine_, (std::uint32_t*)r_ptr,
                                  n, distr.lambda());
@@ -378,7 +378,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_poisson, status, engine_, r_ptr, n,
                                  distr.lambda());
@@ -394,7 +394,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.template get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](uint32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](uint32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate, status, engine_, r_ptr, n);
                 });
@@ -407,12 +407,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r, n);
                 });
@@ -425,12 +425,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r, n);
                 });
@@ -443,14 +444,14 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         std::uint32_t* ib = (std::uint32_t*)malloc_device(
             n * sizeof(std::uint32_t), queue_.get_device(), queue_.get_context());
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate, status, engine_, ib, n);
                 });
@@ -463,12 +464,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r, n);
                 });
@@ -481,12 +482,13 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r, n);
                 });
@@ -499,12 +501,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -517,12 +519,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -535,11 +537,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -552,11 +554,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -569,12 +571,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -587,12 +589,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -605,11 +607,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -622,11 +624,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -641,7 +644,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
         return sycl::event{};
@@ -650,7 +653,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "mrg32ka engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
         return sycl::event{};
@@ -661,7 +664,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_poisson, status, engine_, (std::uint32_t*)r, n,
                              distr.lambda());
@@ -679,7 +682,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         sycl::event::wait_and_throw(dependencies);
 
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_poisson, status, engine_, r, n, distr.lambda());
             });
@@ -694,7 +697,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
                                  const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate, status, engine_, r, n);
             });
@@ -705,7 +708,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
         return event;
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new mrg32k3a_impl(this);
     }
 
@@ -715,12 +718,12 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "skip_ahead",
-                                         "initializer list unsupported by rocRAND backend");
+        throw oneapi::math::unimplemented("rng", "skip_ahead",
+                                          "initializer list unsupported by rocRAND backend");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog", "unsupported by rocRAND backend");
+        throw oneapi::math::unimplemented("rng", "leapfrog", "unsupported by rocRAND backend");
     }
 
     virtual ~mrg32k3a_impl() override {
@@ -737,290 +740,293 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 };
 #else // rocRAND backend is currently not supported on Windows
-class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl {
+class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     mrg32k3a_impl(sycl::queue queue, std::uint32_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     mrg32k3a_impl(sycl::queue queue, std::initializer_list<std::uint32_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::math::rng::detail::engine_impl(*other) {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void generate(const bits<std::uint32_t>& distr, std::int64_t n,
                           sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bits<std::uint32_t>& distr, std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return sycl::event{};
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
         return nullptr;
     }
 
     virtual void skip_ahead(std::uint64_t num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "mrg32ka engine");
+        throw oneapi::math::unimplemented("rng", "mrg32ka engine");
     }
 
     virtual ~mrg32k3a_impl() override {}
 };
 #endif
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
-                                                       std::initializer_list<std::uint32_t> seed) {
+oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue,
+                                                        std::initializer_list<std::uint32_t> seed) {
     return new mrg32k3a_impl(queue, seed);
 }
 
 } // namespace rocrand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/rocrand/philox4x32x10.cpp b/src/rng/backends/rocrand/philox4x32x10.cpp
index 5bc241360..682aac11e 100644
--- a/src/rng/backends/rocrand/philox4x32x10.cpp
+++ b/src/rng/backends/rocrand/philox4x32x10.cpp
@@ -74,13 +74,13 @@
 
 #include "rocrand_helper.hpp"
 #include "rocrand_task.hpp"
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/rng/detail/rocrand/onemkl_rng_rocrand.hpp"
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
-// #include "oneapi/mkl/rng/engines.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
+// #include "oneapi/math/rng/engines.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace rocrand {
 
@@ -103,14 +103,14 @@ namespace rocrand {
  * consumed in other kernels without requiring the random numbers to be written
  * to, and read from, global memory.
  *
- * Here we utilize the host API since this is most aligned with how oneMKL
+ * Here we utilize the host API since this is most aligned with how oneMath
  * generates random numbers.
  *
  */
-class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
+class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     philox4x32x10_impl(sycl::queue queue, std::uint64_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue),
+            : oneapi::math::rng::detail::engine_impl(queue),
               seed_(seed),
               offset_(0) {
         rocrand_status status;
@@ -119,13 +119,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     philox4x32x10_impl(sycl::queue queue, std::initializer_list<std::uint64_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine",
-                                         "multi-seed unsupported by rocRAND backend");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine",
+                                          "multi-seed unsupported by rocRAND backend");
     }
 
     philox4x32x10_impl(const philox4x32x10_impl* other)
-            : oneapi::mkl::rng::detail::engine_impl(*other),
+            : oneapi::math::rng::detail::engine_impl(*other),
               seed_(other->seed_),
               offset_(other->offset_) {
         rocrand_status status;
@@ -139,12 +139,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     // Buffers API
 
     virtual inline void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r_ptr, n);
                 });
@@ -156,13 +156,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp<float>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r_ptr, n);
                 });
@@ -174,14 +174,14 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp<double>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
         sycl::buffer<std::uint32_t, 1> ib(n);
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = ib.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate, status, engine_, r_ptr, n);
                 });
@@ -194,12 +194,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r_ptr, n);
                 });
@@ -211,13 +211,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate<float>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r_ptr, n);
                 });
@@ -229,13 +229,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         range_transform_fp_accurate<double>(queue_, distr.a(), distr.b(), n, r);
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal, status, engine_, r_ptr, n, distr.mean(),
                                  distr.stddev());
@@ -246,13 +246,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r_ptr, n,
                                  distr.mean(), distr.stddev());
@@ -264,12 +264,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal, status, engine_, r_ptr, n, distr.mean(),
                                  distr.stddev());
@@ -281,12 +281,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r_ptr, n,
                                  distr.mean(), distr.stddev());
@@ -297,13 +297,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r_ptr, n, distr.m(),
                                  distr.s());
@@ -314,13 +314,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r_ptr, n,
                                  distr.m(), distr.s());
@@ -332,12 +332,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](float* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r_ptr, n, distr.m(),
                                  distr.s());
@@ -348,13 +348,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         increment_internal_offset(n);
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](double* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r_ptr, n,
                                  distr.m(), distr.s());
@@ -367,14 +367,14 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
     }
@@ -384,7 +384,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](std::int32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](std::int32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_poisson, status, engine_, (std::uint32_t*)r_ptr,
                                  n, distr.lambda());
@@ -400,7 +400,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_poisson, status, engine_, r_ptr, n,
                                  distr.lambda());
@@ -416,7 +416,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         queue_
             .submit([&](sycl::handler& cgh) {
                 auto acc = r.template get_access<sycl::access::mode::read_write>(cgh);
-                onemkl_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
+                onemath_rocrand_host_task(cgh, acc, engine_, [=](std::uint32_t* r_ptr) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate, status, engine_, r_ptr, n);
                 });
@@ -429,12 +429,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r, n);
                 });
@@ -447,12 +447,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r, n);
                 });
@@ -465,14 +466,14 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         std::uint32_t* ib = (std::uint32_t*)malloc_device(
             n * sizeof(std::uint32_t), queue_.get_device(), queue_.get_context());
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate, status, engine_, ib, n);
                 });
@@ -485,12 +486,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform, status, engine_, r, n);
                 });
@@ -503,12 +504,13 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         queue_
             .submit([&](sycl::handler& cgh) {
-                onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+                onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                     rocrand_status status;
                     ROCRAND_CALL(rocrand_generate_uniform_double, status, engine_, r, n);
                 });
@@ -521,12 +523,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -539,12 +541,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -557,11 +559,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -574,11 +576,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_normal_double, status, engine_, r, n, distr.mean(),
                              distr.stddev());
@@ -591,12 +593,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -609,12 +611,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -627,11 +629,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -644,11 +646,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_log_normal_double, status, engine_, r, n, distr.m(),
                              distr.s());
@@ -663,7 +666,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
         return sycl::event{};
@@ -672,7 +675,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "rng", "philox4x32x10 engine",
             "Bernoulli distribution method unsupported by rocRAND backend");
         return sycl::event{};
@@ -683,7 +686,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_poisson, status, engine_, (std::uint32_t*)r, n,
                              distr.lambda());
@@ -700,7 +703,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate_poisson, status, engine_, r, n, distr.lambda());
             });
@@ -715,7 +718,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
                                  const std::vector<sycl::event>& dependencies) override {
         sycl::event::wait_and_throw(dependencies);
         auto event = queue_.submit([&](sycl::handler& cgh) {
-            onemkl_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
+            onemath_rocrand_host_task(cgh, engine_, [=](sycl::interop_handle ih) {
                 rocrand_status status;
                 ROCRAND_CALL(rocrand_generate, status, engine_, r, n);
             });
@@ -726,7 +729,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
         return event;
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
         return new philox4x32x10_impl(this);
     }
 
@@ -736,12 +739,12 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "skip_ahead",
-                                         "initializer list unsupported by rocRAND backend");
+        throw oneapi::math::unimplemented("rng", "skip_ahead",
+                                          "initializer list unsupported by rocRAND backend");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "leapfrog", "unsupported by rocRAND backend");
+        throw oneapi::math::unimplemented("rng", "leapfrog", "unsupported by rocRAND backend");
     }
 
     virtual ~philox4x32x10_impl() override {
@@ -758,291 +761,295 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
     }
 };
 #else // rocRAND backend is currently not supported on Windows
-class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl {
+class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl {
 public:
     philox4x32x10_impl(sycl::queue queue, std::uint64_t seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     philox4x32x10_impl(sycl::queue queue, std::initializer_list<std::uint64_t> seed)
-            : oneapi::mkl::rng::detail::engine_impl(queue) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+            : oneapi::math::rng::detail::engine_impl(queue) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     philox4x32x10_impl(const philox4x32x10_impl* other)
-            : oneapi::mkl::rng::detail::engine_impl(*other) {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+            : oneapi::math::rng::detail::engine_impl(*other) {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     // Buffers API
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::standard>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::uniform<
-                              std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr,
+    virtual void generate(const oneapi::math::rng::uniform<
+                              std::int32_t, oneapi::math::rng::uniform_method::standard>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual void generate(const oneapi::math::rng::uniform<
+                              double, oneapi::math::rng::uniform_method::accurate>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              float, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::gaussian<
-                              double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::gaussian<
+                              double, oneapi::math::rng::gaussian_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              float, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(const oneapi::mkl::rng::lognormal<
-                              double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr,
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::box_muller2>& distr,
                           std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, sycl::buffer<float, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
-    virtual void generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
-        std::int64_t n, sycl::buffer<double, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual void generate(const oneapi::math::rng::lognormal<
+                              double, oneapi::math::rng::lognormal_method::icdf>& distr,
+                          std::int64_t n, sycl::buffer<double, 1>& r) override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::int32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr,
                           std::int64_t n, sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void generate(const bits<std::uint32_t>& distr, std::int64_t n,
                           sycl::buffer<std::uint32_t, 1>& r) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     // USM APIs
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>&
+        const oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>&
             distr,
         std::int64_t n, std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>& distr,
+        const oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>&
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>&
             distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>& distr,
+        const oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>&
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>&
             distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>&
-            distr,
+        const oneapi::math::rng::lognormal<double,
+                                           oneapi::math::rng::lognormal_method::box_muller2>& distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>& distr,
         std::int64_t n, float* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
-        const oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>& distr,
+        const oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>&
+            distr,
         std::int64_t n, double* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::int32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::int32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bernoulli<std::uint32_t, bernoulli_method::icdf>& distr,
                                  std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::int32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::int32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(
         const poisson<std::uint32_t, poisson_method::gaussian_icdf_based>& distr, std::int64_t n,
         std::uint32_t* r, const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
     virtual sycl::event generate(const bits<std::uint32_t>& distr, std::int64_t n, std::uint32_t* r,
                                  const std::vector<sycl::event>& dependencies) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return sycl::event{};
     }
 
-    virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+    virtual oneapi::math::rng::detail::engine_impl* copy_state() override {
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
         return nullptr;
     }
 
     virtual void skip_ahead(std::uint64_t num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override {
-        throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine");
+        throw oneapi::math::unimplemented("rng", "philox4x32x10 engine");
     }
 
     virtual ~philox4x32x10_impl() override {}
 };
 #endif
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) {
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue,
+                                                             std::uint64_t seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
-oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(
+oneapi::math::rng::detail::engine_impl* create_philox4x32x10(
     sycl::queue queue, std::initializer_list<std::uint64_t> seed) {
     return new philox4x32x10_impl(queue, seed);
 }
 
 } // namespace rocrand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/rng/backends/rocrand/rocrand_helper.hpp b/src/rng/backends/rocrand/rocrand_helper.hpp
index 205429ee8..594e8e122 100644
--- a/src/rng/backends/rocrand/rocrand_helper.hpp
+++ b/src/rng/backends/rocrand/rocrand_helper.hpp
@@ -62,19 +62,19 @@
  * @file rocrand_helper.cpp : contains the implementation of all the routines
  * for HIP backend
  */
-#ifndef _MKL_RNG_ROCRAND_HELPER_HPP_
-#define _MKL_RNG_ROCRAND_HELPER_HPP_
+#ifndef ONEMATH_RNG_ROCRAND_HELPER_HPP_
+#define ONEMATH_RNG_ROCRAND_HELPER_HPP_
 
 #include <rocrand.h>
 #include <complex>
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace rocrand {
 
-// Static template functions oneapi::mkl::rng::rocrand::range_transform_fp for
+// Static template functions oneapi::math::rng::rocrand::range_transform_fp for
 // Buffer and USM APIs
 //
 // rocRAND has no built-in functionality to specify a custom range for sampling
@@ -138,7 +138,7 @@ static inline sycl::event range_transform_fp_accurate(sycl::queue& queue, T a, T
     });
 }
 
-// Static template functions oneapi::mkl::rng::rocrand::range_transform_int for
+// Static template functions oneapi::math::rng::rocrand::range_transform_int for
 // Buffer and USM APIs
 //
 // rocRAND has no built-in functionality to specify a custom range for sampling
@@ -173,7 +173,7 @@ inline sycl::event range_transform_int(sycl::queue& queue, T a, T b, std::int64_
     });
 }
 
-// Static template functions oneapi::mkl::rng::rocrand::sample_bernoulli for
+// Static template functions oneapi::math::rng::rocrand::sample_bernoulli for
 // Buffer and USM APIs
 //
 // rocRAND has no built-in functionality to sample from a Bernoulli distribution.
@@ -329,7 +329,7 @@ class rocm_error : virtual public std::runtime_error {
 
 } // namespace rocrand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
-#endif // _MKL_RNG_ROCRAND_HELPER_HPP_
+#endif // ONEMATH_RNG_ROCRAND_HELPER_HPP_
diff --git a/src/rng/backends/rocrand/rocrand_task.hpp b/src/rng/backends/rocrand/rocrand_task.hpp
index a3e5e375e..d7ece6c45 100644
--- a/src/rng/backends/rocrand/rocrand_task.hpp
+++ b/src/rng/backends/rocrand/rocrand_task.hpp
@@ -1,5 +1,5 @@
-#ifndef _MKL_RNG_ROCRAND_TASK_HPP_
-#define _MKL_RNG_ROCRAND_TASK_HPP_
+#ifndef ONEMATH_RNG_ROCRAND_TASK_HPP_
+#define ONEMATH_RNG_ROCRAND_TASK_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -10,7 +10,7 @@
 #include "rocrand_helper.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace rocrand {
 #ifdef __HIPSYCL__
@@ -73,18 +73,18 @@ static inline void host_task_internal(H& cgh, E e, F f) {
 }
 #endif
 template <typename H, typename A, typename E, typename F>
-static inline void onemkl_rocrand_host_task(H& cgh, A acc, E e, F f) {
+static inline void onemath_rocrand_host_task(H& cgh, A acc, E e, F f) {
     host_task_internal(cgh, acc, e, f);
 }
 
 template <typename H, typename Engine, typename F>
-static inline void onemkl_rocrand_host_task(H& cgh, Engine e, F f) {
+static inline void onemath_rocrand_host_task(H& cgh, Engine e, F f) {
     host_task_internal(cgh, e, f);
 }
 
 } // namespace rocrand
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif
diff --git a/src/rng/backends/rocrand/mkl_rng_rocrand_wrappers.cpp b/src/rng/backends/rocrand/rocrand_wrappers.cpp
similarity index 90%
rename from src/rng/backends/rocrand/mkl_rng_rocrand_wrappers.cpp
rename to src/rng/backends/rocrand/rocrand_wrappers.cpp
index 5450f47b4..365f1b99e 100644
--- a/src/rng/backends/rocrand/mkl_rng_rocrand_wrappers.cpp
+++ b/src/rng/backends/rocrand/rocrand_wrappers.cpp
@@ -58,13 +58,13 @@
  * so.
  ******************************************************************************/
 
-#include "oneapi/mkl/rng/detail/rocrand/onemkl_rng_rocrand.hpp"
+#include "oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp"
 #include "rng/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 
-extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = {
-    WRAPPER_VERSION, oneapi::mkl::rng::rocrand::create_philox4x32x10,
-    oneapi::mkl::rng::rocrand::create_philox4x32x10, oneapi::mkl::rng::rocrand::create_mrg32k3a,
-    oneapi::mkl::rng::rocrand::create_mrg32k3a
+extern "C" ONEMATH_EXPORT rng_function_table_t onemath_rng_table = {
+    WRAPPER_VERSION, oneapi::math::rng::rocrand::create_philox4x32x10,
+    oneapi::math::rng::rocrand::create_philox4x32x10, oneapi::math::rng::rocrand::create_mrg32k3a,
+    oneapi::math::rng::rocrand::create_mrg32k3a
 };
diff --git a/src/rng/function_table.hpp b/src/rng/function_table.hpp
index c94757250..06c9c83e1 100644
--- a/src/rng/function_table.hpp
+++ b/src/rng/function_table.hpp
@@ -27,19 +27,19 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/rng/detail/engine_impl.hpp"
+#include "oneapi/math/rng/detail/engine_impl.hpp"
 
 typedef struct {
     int version;
 
-    oneapi::mkl::rng::detail::engine_impl* (*create_philox4x32x10_sycl)(sycl::queue queue,
-                                                                        std::uint64_t seed);
-    oneapi::mkl::rng::detail::engine_impl* (*create_philox4x32x10_ex_sycl)(
+    oneapi::math::rng::detail::engine_impl* (*create_philox4x32x10_sycl)(sycl::queue queue,
+                                                                         std::uint64_t seed);
+    oneapi::math::rng::detail::engine_impl* (*create_philox4x32x10_ex_sycl)(
         sycl::queue queue, std::initializer_list<std::uint64_t> seed);
 
-    oneapi::mkl::rng::detail::engine_impl* (*create_mrg32k3a_sycl)(sycl::queue queue,
-                                                                   std::uint32_t seed);
-    oneapi::mkl::rng::detail::engine_impl* (*create_mrg32k3a_ex_sycl)(
+    oneapi::math::rng::detail::engine_impl* (*create_mrg32k3a_sycl)(sycl::queue queue,
+                                                                    std::uint32_t seed);
+    oneapi::math::rng::detail::engine_impl* (*create_mrg32k3a_ex_sycl)(
         sycl::queue queue, std::initializer_list<std::uint32_t> seed);
 } rng_function_table_t;
 
diff --git a/src/rng/rng_loader.cpp b/src/rng/rng_loader.cpp
index 68e3a5ba5..3bd78f522 100644
--- a/src/rng/rng_loader.cpp
+++ b/src/rng/rng_loader.cpp
@@ -17,38 +17,38 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#include "oneapi/mkl/rng/detail/rng_loader.hpp"
+#include "oneapi/math/rng/detail/rng_loader.hpp"
 
 #include "function_table_initializer.hpp"
 #include "rng/function_table.hpp"
 
 namespace oneapi {
-namespace mkl {
+namespace math {
 namespace rng {
 namespace detail {
 
-static oneapi::mkl::detail::table_initializer<domain::rng, rng_function_table_t> function_tables;
+static oneapi::math::detail::table_initializer<domain::rng, rng_function_table_t> function_tables;
 
-engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue,
+engine_impl* create_philox4x32x10(oneapi::math::device libkey, sycl::queue queue,
                                   std::uint64_t seed) {
     return function_tables[{ libkey, queue }].create_philox4x32x10_sycl(queue, seed);
 }
 
-engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, sycl::queue queue,
+engine_impl* create_philox4x32x10(oneapi::math::device libkey, sycl::queue queue,
                                   std::initializer_list<std::uint64_t> seed) {
     return function_tables[{ libkey, queue }].create_philox4x32x10_ex_sycl(queue, seed);
 }
 
-engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue, std::uint32_t seed) {
+engine_impl* create_mrg32k3a(oneapi::math::device libkey, sycl::queue queue, std::uint32_t seed) {
     return function_tables[{ libkey, queue }].create_mrg32k3a_sycl(queue, seed);
 }
 
-engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, sycl::queue queue,
+engine_impl* create_mrg32k3a(oneapi::math::device libkey, sycl::queue queue,
                              std::initializer_list<std::uint32_t> seed) {
     return function_tables[{ libkey, queue }].create_mrg32k3a_ex_sycl(queue, seed);
 }
 
 } // namespace detail
 } // namespace rng
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
diff --git a/src/sparse_blas/CMakeLists.txt b/src/sparse_blas/CMakeLists.txt
index b01cc63fd..a2f99a7d3 100644
--- a/src/sparse_blas/CMakeLists.txt
+++ b/src/sparse_blas/CMakeLists.txt
@@ -20,29 +20,30 @@
 add_subdirectory(backends)
 
 if(BUILD_SHARED_LIBS)
-  add_library(onemkl_sparse_blas OBJECT)
-  target_sources(onemkl_sparse_blas PRIVATE sparse_blas_loader.cpp)
-  target_include_directories(onemkl_sparse_blas
+  add_library(onemath_sparse_blas OBJECT)
+  add_deprecated_library(onemath_sparse_blas)
+  target_sources(onemath_sparse_blas PRIVATE sparse_blas_loader.cpp)
+  target_include_directories(onemath_sparse_blas
     PRIVATE ${PROJECT_SOURCE_DIR}/include
             ${PROJECT_SOURCE_DIR}/src
             ${PROJECT_SOURCE_DIR}/src/include
             ${CMAKE_BINARY_DIR}/bin
-            ${ONEMKL_GENERATED_INCLUDE_PATH}
-            $<TARGET_FILE_DIR:onemkl>
+            ${ONEMATH_GENERATED_INCLUDE_PATH}
+            $<TARGET_FILE_DIR:onemath>
   )
 
-  target_compile_options(onemkl_sparse_blas PRIVATE ${ONEMKL_BUILD_COPT})
+  target_compile_options(onemath_sparse_blas PRIVATE ${ONEMATH_BUILD_COPT})
 
-  set_target_properties(onemkl_sparse_blas PROPERTIES
+  set_target_properties(onemath_sparse_blas PROPERTIES
     POSITION_INDEPENDENT_CODE ON
   )
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
-    add_sycl_to_target(TARGET onemkl_sparse_blas SOURCES sparse_blas_loader.cpp)
+    add_sycl_to_target(TARGET onemath_sparse_blas SOURCES sparse_blas_loader.cpp)
   else()
-    target_link_libraries(onemkl_sparse_blas PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(onemath_sparse_blas PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 
   include(WarningsUtils)
-  target_link_libraries(onemkl_sparse_blas PRIVATE onemkl_warnings)
+  target_link_libraries(onemath_sparse_blas PRIVATE onemath_warnings)
 
 endif()
diff --git a/src/sparse_blas/backends/CMakeLists.txt b/src/sparse_blas/backends/CMakeLists.txt
index baae9445d..405c79ce7 100644
--- a/src/sparse_blas/backends/CMakeLists.txt
+++ b/src/sparse_blas/backends/CMakeLists.txt
@@ -17,8 +17,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-add_custom_target(onemkl_backend_libs_sparse_blas)
-add_dependencies(onemkl_backend_libs onemkl_backend_libs_sparse_blas)
+add_custom_target(onemath_backend_libs_sparse_blas)
+add_dependencies(onemath_backend_libs onemath_backend_libs_sparse_blas)
 
 if(ENABLE_MKLCPU_BACKEND)
   add_subdirectory(mklcpu)
diff --git a/src/sparse_blas/backends/backend_wrappers.cxx b/src/sparse_blas/backends/backend_wrappers.cxx
index fff743e82..07a0e54fe 100644
--- a/src/sparse_blas/backends/backend_wrappers.cxx
+++ b/src/sparse_blas/backends/backend_wrappers.cxx
@@ -26,7 +26,7 @@ To use this:
 #define WRAPPER_VERSION <Wrapper version number>
 #define BACKEND         <Backend name eg. mklgpu>
 
-extern "C" sparse_blas_function_table_t mkl_sparse_blas_table = {
+extern "C" sparse_blas_function_table_t onemath_sparse_blas_table = {
     WRAPPER_VERSION,
 #include "sparse_blas/backends/backend_wrappers.cxx"
 };
@@ -48,68 +48,68 @@ function template instantiations must be added to backend_sparse_blas_instantiat
 // clang-format off
 // Dense vector
 #define LIST_DENSE_VECTOR_FUNCS() \
-oneapi::mkl::sparse::BACKEND::init_dense_vector, \
-oneapi::mkl::sparse::BACKEND::init_dense_vector, \
-oneapi::mkl::sparse::BACKEND::set_dense_vector_data, \
-oneapi::mkl::sparse::BACKEND::set_dense_vector_data,
+oneapi::math::sparse::BACKEND::init_dense_vector, \
+oneapi::math::sparse::BACKEND::init_dense_vector, \
+oneapi::math::sparse::BACKEND::set_dense_vector_data, \
+oneapi::math::sparse::BACKEND::set_dense_vector_data,
 REPEAT_FOR_EACH_FP_TYPE(LIST_DENSE_VECTOR_FUNCS)
 #undef LIST_DENSE_VECTOR_FUNCS
-oneapi::mkl::sparse::BACKEND::release_dense_vector,
+oneapi::math::sparse::BACKEND::release_dense_vector,
 
 // Dense matrix
 #define LIST_DENSE_MATRIX_FUNCS() \
-oneapi::mkl::sparse::BACKEND::init_dense_matrix, \
-oneapi::mkl::sparse::BACKEND::init_dense_matrix, \
-oneapi::mkl::sparse::BACKEND::set_dense_matrix_data, \
-oneapi::mkl::sparse::BACKEND::set_dense_matrix_data,
+oneapi::math::sparse::BACKEND::init_dense_matrix, \
+oneapi::math::sparse::BACKEND::init_dense_matrix, \
+oneapi::math::sparse::BACKEND::set_dense_matrix_data, \
+oneapi::math::sparse::BACKEND::set_dense_matrix_data,
 REPEAT_FOR_EACH_FP_TYPE(LIST_DENSE_MATRIX_FUNCS)
 #undef LIST_DENSE_MATRIX_FUNCS
-oneapi::mkl::sparse::BACKEND::release_dense_matrix,
+oneapi::math::sparse::BACKEND::release_dense_matrix,
 
 // COO matrix
 #define LIST_COO_MATRIX_FUNCS() \
-oneapi::mkl::sparse::BACKEND::init_coo_matrix, \
-oneapi::mkl::sparse::BACKEND::init_coo_matrix, \
-oneapi::mkl::sparse::BACKEND::set_coo_matrix_data, \
-oneapi::mkl::sparse::BACKEND::set_coo_matrix_data,
+oneapi::math::sparse::BACKEND::init_coo_matrix, \
+oneapi::math::sparse::BACKEND::init_coo_matrix, \
+oneapi::math::sparse::BACKEND::set_coo_matrix_data, \
+oneapi::math::sparse::BACKEND::set_coo_matrix_data,
 REPEAT_FOR_EACH_FP_AND_INT_TYPE(LIST_COO_MATRIX_FUNCS)
 #undef LIST_COO_MATRIX_FUNCS
 
 // CSR matrix
 #define LIST_CSR_MATRIX_FUNCS() \
-oneapi::mkl::sparse::BACKEND::init_csr_matrix, \
-oneapi::mkl::sparse::BACKEND::init_csr_matrix, \
-oneapi::mkl::sparse::BACKEND::set_csr_matrix_data, \
-oneapi::mkl::sparse::BACKEND::set_csr_matrix_data,
+oneapi::math::sparse::BACKEND::init_csr_matrix, \
+oneapi::math::sparse::BACKEND::init_csr_matrix, \
+oneapi::math::sparse::BACKEND::set_csr_matrix_data, \
+oneapi::math::sparse::BACKEND::set_csr_matrix_data,
 REPEAT_FOR_EACH_FP_AND_INT_TYPE(LIST_CSR_MATRIX_FUNCS)
 #undef LIST_CSR_MATRIX_FUNCS
 
 // Common sparse matrix functions
-oneapi::mkl::sparse::BACKEND::release_sparse_matrix,
-oneapi::mkl::sparse::BACKEND::set_matrix_property,
+oneapi::math::sparse::BACKEND::release_sparse_matrix,
+oneapi::math::sparse::BACKEND::set_matrix_property,
 
 // SPMM
-oneapi::mkl::sparse::BACKEND::init_spmm_descr,
-oneapi::mkl::sparse::BACKEND::release_spmm_descr,
-oneapi::mkl::sparse::BACKEND::spmm_buffer_size,
-oneapi::mkl::sparse::BACKEND::spmm_optimize,
-oneapi::mkl::sparse::BACKEND::spmm_optimize,
-oneapi::mkl::sparse::BACKEND::spmm,
+oneapi::math::sparse::BACKEND::init_spmm_descr,
+oneapi::math::sparse::BACKEND::release_spmm_descr,
+oneapi::math::sparse::BACKEND::spmm_buffer_size,
+oneapi::math::sparse::BACKEND::spmm_optimize,
+oneapi::math::sparse::BACKEND::spmm_optimize,
+oneapi::math::sparse::BACKEND::spmm,
 
 // SPMV
-oneapi::mkl::sparse::BACKEND::init_spmv_descr,
-oneapi::mkl::sparse::BACKEND::release_spmv_descr,
-oneapi::mkl::sparse::BACKEND::spmv_buffer_size,
-oneapi::mkl::sparse::BACKEND::spmv_optimize,
-oneapi::mkl::sparse::BACKEND::spmv_optimize,
-oneapi::mkl::sparse::BACKEND::spmv,
+oneapi::math::sparse::BACKEND::init_spmv_descr,
+oneapi::math::sparse::BACKEND::release_spmv_descr,
+oneapi::math::sparse::BACKEND::spmv_buffer_size,
+oneapi::math::sparse::BACKEND::spmv_optimize,
+oneapi::math::sparse::BACKEND::spmv_optimize,
+oneapi::math::sparse::BACKEND::spmv,
 
 // SPSV
-oneapi::mkl::sparse::BACKEND::init_spsv_descr,
-oneapi::mkl::sparse::BACKEND::release_spsv_descr,
-oneapi::mkl::sparse::BACKEND::spsv_buffer_size,
-oneapi::mkl::sparse::BACKEND::spsv_optimize,
-oneapi::mkl::sparse::BACKEND::spsv_optimize,
-oneapi::mkl::sparse::BACKEND::spsv,
+oneapi::math::sparse::BACKEND::init_spsv_descr,
+oneapi::math::sparse::BACKEND::release_spsv_descr,
+oneapi::math::sparse::BACKEND::spsv_buffer_size,
+oneapi::math::sparse::BACKEND::spsv_optimize,
+oneapi::math::sparse::BACKEND::spsv_optimize,
+oneapi::math::sparse::BACKEND::spsv,
 
     // clang-format on
diff --git a/src/sparse_blas/backends/cusparse/CMakeLists.txt b/src/sparse_blas/backends/cusparse/CMakeLists.txt
index 60bbaf35f..636202868 100644
--- a/src/sparse_blas/backends/cusparse/CMakeLists.txt
+++ b/src/sparse_blas/backends/cusparse/CMakeLists.txt
@@ -17,7 +17,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_sparse_blas_cusparse)
+set(LIB_NAME onemath_sparse_blas_cusparse)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 include(WarningsUtils)
@@ -31,16 +31,16 @@ add_library(${LIB_OBJ} OBJECT
   operations/cusparse_spsv.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: cusparse_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_sparse_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_sparse_blas ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if (${CMAKE_VERSION} VERSION_LESS "3.17.0")
   find_package(CUDA 12.2 REQUIRED)
@@ -52,8 +52,8 @@ else()
 endif()
 
 target_link_libraries(${LIB_OBJ}
-  PUBLIC ONEMKL::SYCL::SYCL
-  PRIVATE onemkl_warnings
+  PUBLIC ONEMATH::SYCL::SYCL
+  PRIVATE onemath_warnings
 )
 
 set_target_properties(${LIB_OBJ} PROPERTIES
@@ -61,10 +61,10 @@ set_target_properties(${LIB_OBJ} PROPERTIES
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -77,8 +77,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/sparse_blas/backends/cusparse/cusparse_error.hpp b/src/sparse_blas/backends/cusparse/cusparse_error.hpp
index 738888576..54b977db6 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_error.hpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_error.hpp
@@ -17,57 +17,57 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_ERROR_HPP_
-#define _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_ERROR_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_ERROR_HPP_
+#define _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_ERROR_HPP_
 
 #include <string>
 
 #include <cuda.h>
 #include <cusparse.h>
 
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
-namespace oneapi::mkl::sparse::cusparse::detail {
+namespace oneapi::math::sparse::cusparse::detail {
 
 inline std::string cuda_result_to_str(CUresult result) {
     switch (result) {
-#define ONEMKL_CUSPARSE_CASE(STATUS) \
+#define ONEMATH_CUSPARSE_CASE(STATUS) \
     case STATUS: return #STATUS
-        ONEMKL_CUSPARSE_CASE(CUDA_SUCCESS);
-        ONEMKL_CUSPARSE_CASE(CUDA_ERROR_NOT_PERMITTED);
-        ONEMKL_CUSPARSE_CASE(CUDA_ERROR_INVALID_CONTEXT);
-        ONEMKL_CUSPARSE_CASE(CUDA_ERROR_INVALID_DEVICE);
-        ONEMKL_CUSPARSE_CASE(CUDA_ERROR_INVALID_VALUE);
-        ONEMKL_CUSPARSE_CASE(CUDA_ERROR_OUT_OF_MEMORY);
-        ONEMKL_CUSPARSE_CASE(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES);
+        ONEMATH_CUSPARSE_CASE(CUDA_SUCCESS);
+        ONEMATH_CUSPARSE_CASE(CUDA_ERROR_NOT_PERMITTED);
+        ONEMATH_CUSPARSE_CASE(CUDA_ERROR_INVALID_CONTEXT);
+        ONEMATH_CUSPARSE_CASE(CUDA_ERROR_INVALID_DEVICE);
+        ONEMATH_CUSPARSE_CASE(CUDA_ERROR_INVALID_VALUE);
+        ONEMATH_CUSPARSE_CASE(CUDA_ERROR_OUT_OF_MEMORY);
+        ONEMATH_CUSPARSE_CASE(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES);
         default: return "<unknown>";
     }
 }
 
-#define CUDA_ERROR_FUNC(func, ...)                                                          \
-    do {                                                                                    \
-        auto res = func(__VA_ARGS__);                                                       \
-        if (res != CUDA_SUCCESS) {                                                          \
-            throw oneapi::mkl::exception("sparse_blas", #func,                              \
-                                         "cuda error: " + detail::cuda_result_to_str(res)); \
-        }                                                                                   \
+#define CUDA_ERROR_FUNC(func, ...)                                                           \
+    do {                                                                                     \
+        auto res = func(__VA_ARGS__);                                                        \
+        if (res != CUDA_SUCCESS) {                                                           \
+            throw oneapi::math::exception("sparse_blas", #func,                              \
+                                          "cuda error: " + detail::cuda_result_to_str(res)); \
+        }                                                                                    \
     } while (0)
 
 inline std::string cusparse_status_to_str(cusparseStatus_t status) {
     switch (status) {
-#define ONEMKL_CUSPARSE_CASE(STATUS) \
+#define ONEMATH_CUSPARSE_CASE(STATUS) \
     case STATUS: return #STATUS
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_SUCCESS);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_NOT_INITIALIZED);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_ALLOC_FAILED);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_INVALID_VALUE);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_ARCH_MISMATCH);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_EXECUTION_FAILED);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_INTERNAL_ERROR);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_NOT_SUPPORTED);
-        ONEMKL_CUSPARSE_CASE(CUSPARSE_STATUS_INSUFFICIENT_RESOURCES);
-#undef ONEMKL_CUSPARSE_CASE
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_SUCCESS);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_NOT_INITIALIZED);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_ALLOC_FAILED);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_INVALID_VALUE);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_ARCH_MISMATCH);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_EXECUTION_FAILED);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_INTERNAL_ERROR);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_NOT_SUPPORTED);
+        ONEMATH_CUSPARSE_CASE(CUSPARSE_STATUS_INSUFFICIENT_RESOURCES);
+#undef ONEMATH_CUSPARSE_CASE
         default: return "<unknown>";
     }
 }
@@ -81,13 +81,13 @@ inline void check_status(cusparseStatus_t status, const std::string& function,
         error_str += "cuSPARSE status: " + cusparse_status_to_str(status);
         switch (status) {
             case CUSPARSE_STATUS_NOT_SUPPORTED:
-                throw oneapi::mkl::unimplemented("sparse_blas", function, error_str);
+                throw oneapi::math::unimplemented("sparse_blas", function, error_str);
             case CUSPARSE_STATUS_NOT_INITIALIZED:
-                throw oneapi::mkl::uninitialized("sparse_blas", function, error_str);
+                throw oneapi::math::uninitialized("sparse_blas", function, error_str);
             case CUSPARSE_STATUS_INVALID_VALUE:
             case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
-                throw oneapi::mkl::invalid_argument("sparse_blas", function, error_str);
-            default: throw oneapi::mkl::exception("sparse_blas", function, error_str);
+                throw oneapi::math::invalid_argument("sparse_blas", function, error_str);
+            default: throw oneapi::math::exception("sparse_blas", function, error_str);
         }
     }
 }
@@ -98,6 +98,6 @@ inline void check_status(cusparseStatus_t status, const std::string& function,
         detail::check_status(status, #func); \
     } while (0)
 
-} // namespace oneapi::mkl::sparse::cusparse::detail
+} // namespace oneapi::math::sparse::cusparse::detail
 
-#endif // _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_ERROR_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_ERROR_HPP_
diff --git a/src/sparse_blas/backends/cusparse/cusparse_global_handle.hpp b/src/sparse_blas/backends/cusparse/cusparse_global_handle.hpp
index 179b007f5..ea9db44c0 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_global_handle.hpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_global_handle.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_GLOBAL_HANDLE_HPP_
-#define _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_GLOBAL_HANDLE_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_GLOBAL_HANDLE_HPP_
+#define _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_GLOBAL_HANDLE_HPP_
 
 /**
  * @file Similar to blas_handle.hpp
@@ -29,7 +29,7 @@
 #include <atomic>
 #include <unordered_map>
 
-namespace oneapi::mkl::sparse::cusparse::detail {
+namespace oneapi::math::sparse::cusparse::detail {
 
 template <typename T>
 struct cusparse_global_handle {
@@ -58,6 +58,6 @@ struct cusparse_global_handle {
     }
 };
 
-} // namespace oneapi::mkl::sparse::cusparse::detail
+} // namespace oneapi::math::sparse::cusparse::detail
 
-#endif // _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_GLOBAL_HANDLE_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_GLOBAL_HANDLE_HPP_
diff --git a/src/sparse_blas/backends/cusparse/cusparse_handles.cpp b/src/sparse_blas/backends/cusparse/cusparse_handles.cpp
index ff3d8fcae..95a615b05 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_handles.cpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_handles.cpp
@@ -17,7 +17,7 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp"
+#include "oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp"
 
 #include "cusparse_error.hpp"
 #include "cusparse_helper.hpp"
@@ -25,7 +25,7 @@
 #include "cusparse_task.hpp"
 #include "sparse_blas/macros.hpp"
 
-namespace oneapi::mkl::sparse::cusparse {
+namespace oneapi::math::sparse::cusparse {
 
 /**
  * In this file CusparseScopedContextHandler are used to ensure that a cusparseHandle_t is created before any other cuSPARSE call, as required by the specification.
@@ -163,7 +163,7 @@ void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, st
 template <typename fpType>
 void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle,
                            std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
-                           oneapi::mkl::layout dense_layout, sycl::buffer<fpType, 1> val) {
+                           oneapi::math::layout dense_layout, sycl::buffer<fpType, 1> val) {
     detail::check_can_reset_value_handle<fpType>(__func__, dmhandle, true);
     auto event = queue.submit([&](sycl::handler& cgh) {
         auto acc = val.template get_access<sycl::access::mode::read_write>(cgh);
@@ -193,8 +193,8 @@ void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle,
 
 template <typename fpType>
 void set_dense_matrix_data(sycl::queue&, dense_matrix_handle_t dmhandle, std::int64_t num_rows,
-                           std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout,
-                           fpType* val) {
+                           std::int64_t num_cols, std::int64_t ld,
+                           oneapi::math::layout dense_layout, fpType* val) {
     detail::check_can_reset_value_handle<fpType>(__func__, dmhandle, false);
     if (dmhandle->num_rows != num_rows || dmhandle->num_cols != num_cols || dmhandle->ld != ld ||
         dmhandle->dense_layout != dense_layout) {
@@ -229,7 +229,7 @@ sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhan
 // COO matrix
 template <typename fpType, typename intType>
 void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows,
-                     std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                     std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                      sycl::buffer<intType, 1> row_ind, sycl::buffer<intType, 1> col_ind,
                      sycl::buffer<fpType, 1> val) {
     auto event = queue.submit([&](sycl::handler& cgh) {
@@ -257,7 +257,7 @@ void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64
 
 template <typename fpType, typename intType>
 void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows,
-                     std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                     std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                      intType* row_ind, intType* col_ind, fpType* val) {
     auto event = queue.submit([&](sycl::handler& cgh) {
         detail::submit_host_task(cgh, queue, [=](sycl::interop_handle ih) {
@@ -279,7 +279,7 @@ void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64
 
 template <typename fpType, typename intType>
 void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows,
-                         std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                         std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                          sycl::buffer<intType, 1> row_ind, sycl::buffer<intType, 1> col_ind,
                          sycl::buffer<fpType, 1> val) {
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, smhandle, true);
@@ -318,7 +318,7 @@ void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int6
 
 template <typename fpType, typename intType>
 void set_coo_matrix_data(sycl::queue&, matrix_handle_t smhandle, std::int64_t num_rows,
-                         std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                         std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                          intType* row_ind, intType* col_ind, fpType* val) {
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, smhandle, false);
     if (smhandle->num_rows != num_rows || smhandle->num_cols != num_cols || smhandle->nnz != nnz ||
@@ -347,7 +347,7 @@ FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_COO_MATRIX_FUNCS);
 // CSR matrix
 template <typename fpType, typename intType>
 void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows,
-                     std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                     std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                      sycl::buffer<intType, 1> row_ptr, sycl::buffer<intType, 1> col_ind,
                      sycl::buffer<fpType, 1> val) {
     auto event = queue.submit([&](sycl::handler& cgh) {
@@ -375,7 +375,7 @@ void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64
 
 template <typename fpType, typename intType>
 void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows,
-                     std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                     std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                      intType* row_ptr, intType* col_ind, fpType* val) {
     auto event = queue.submit([&](sycl::handler& cgh) {
         detail::submit_host_task(cgh, queue, [=](sycl::interop_handle ih) {
@@ -398,7 +398,7 @@ void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64
 
 template <typename fpType, typename intType>
 void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows,
-                         std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                         std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                          sycl::buffer<intType, 1> row_ptr, sycl::buffer<intType, 1> col_ind,
                          sycl::buffer<fpType, 1> val) {
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, smhandle, true);
@@ -437,7 +437,7 @@ void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int6
 
 template <typename fpType, typename intType>
 void set_csr_matrix_data(sycl::queue&, matrix_handle_t smhandle, std::int64_t num_rows,
-                         std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                         std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                          intType* row_ptr, intType* col_ind, fpType* val) {
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, smhandle, false);
     if (smhandle->num_rows != num_rows || smhandle->num_cols != num_cols || smhandle->nnz != nnz ||
@@ -482,4 +482,4 @@ bool set_matrix_property(sycl::queue&, matrix_handle_t smhandle, matrix_property
     return false;
 }
 
-} // namespace oneapi::mkl::sparse::cusparse
+} // namespace oneapi::math::sparse::cusparse
diff --git a/src/sparse_blas/backends/cusparse/cusparse_handles.hpp b/src/sparse_blas/backends/cusparse/cusparse_handles.hpp
index 5e5bdc732..3298f7c7f 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_handles.hpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_handles.hpp
@@ -17,14 +17,14 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_CUSPARSE_HANDLES_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_CUSPARSE_HANDLES_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_CUSPARSE_HANDLES_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_CUSPARSE_HANDLES_HPP_
 
 #include <cusparse.h>
 
 #include "sparse_blas/generic_container.hpp"
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 // Complete the definition of incomplete types dense_vector_handle, dense_matrix_handle and matrix_handle.
 
@@ -60,7 +60,7 @@ struct matrix_handle : public detail::generic_sparse_handle<cusparseSpMatDescr_t
     template <typename fpType, typename intType>
     matrix_handle(cusparseSpMatDescr_t cu_descr, intType* row_ptr, intType* col_ptr,
                   fpType* value_ptr, detail::sparse_format format, std::int64_t num_rows,
-                  std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index)
+                  std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index)
             : detail::generic_sparse_handle<cusparseSpMatDescr_t>(
                   cu_descr, row_ptr, col_ptr, value_ptr, format, num_rows, num_cols, nnz, index) {}
 
@@ -69,7 +69,7 @@ struct matrix_handle : public detail::generic_sparse_handle<cusparseSpMatDescr_t
                   const sycl::buffer<intType, 1> col_buffer,
                   const sycl::buffer<fpType, 1> value_buffer, detail::sparse_format format,
                   std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                  oneapi::mkl::index_base index)
+                  oneapi::math::index_base index)
             : detail::generic_sparse_handle<cusparseSpMatDescr_t>(cu_descr, row_buffer, col_buffer,
                                                                   value_buffer, format, num_rows,
                                                                   num_cols, nnz, index) {}
@@ -82,7 +82,7 @@ inline void check_valid_matrix_properties(const std::string& function_name,
     if (sm_handle->format == sparse_format::COO &&
         !(sm_handle->has_matrix_property(matrix_property::sorted_by_rows) ||
           sm_handle->has_matrix_property(matrix_property::sorted))) {
-        throw mkl::unimplemented(
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support unsorted COO format. Use `set_matrix_property` to set the property `matrix_property::sorted_by_rows` or `matrix_property::sorted`");
     }
@@ -90,6 +90,6 @@ inline void check_valid_matrix_properties(const std::string& function_name,
 
 } // namespace detail
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_CUSPARSE_HANDLES_HPP_
+#endif // _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_CUSPARSE_HANDLES_HPP_
diff --git a/src/sparse_blas/backends/cusparse/cusparse_helper.hpp b/src/sparse_blas/backends/cusparse/cusparse_helper.hpp
index 3feb4bcad..6f1411551 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_helper.hpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_helper.hpp
@@ -16,8 +16,8 @@
 *  limitations under the License.
 *
 **************************************************************************/
-#ifndef _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_HELPER_HPP_
-#define _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_HELPER_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_HELPER_HPP_
+#define _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_HELPER_HPP_
 
 #include <complex>
 #include <cstdint>
@@ -26,14 +26,14 @@
 
 #include <cusparse.h>
 
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 #include "sparse_blas/enum_data_types.hpp"
 #include "sparse_blas/sycl_helper.hpp"
 #include "cusparse_error.hpp"
 
-namespace oneapi::mkl::sparse::cusparse::detail {
+namespace oneapi::math::sparse::cusparse::detail {
 
-using namespace oneapi::mkl::sparse::detail;
+using namespace oneapi::math::sparse::detail;
 
 template <typename T>
 struct CudaEnumType;
@@ -70,16 +70,16 @@ inline std::string cast_enum_to_str(E e) {
     return std::to_string(static_cast<char>(e));
 }
 
-inline cudaDataType_t get_cuda_value_type(data_type onemkl_data_type) {
-    switch (onemkl_data_type) {
+inline cudaDataType_t get_cuda_value_type(data_type onemath_data_type) {
+    switch (onemath_data_type) {
         case data_type::real_fp32: return CUDA_R_32F;
         case data_type::real_fp64: return CUDA_R_64F;
         case data_type::complex_fp32: return CUDA_C_32F;
         case data_type::complex_fp64: return CUDA_C_64F;
         default:
-            throw oneapi::mkl::invalid_argument(
+            throw oneapi::math::invalid_argument(
                 "sparse_blas", "get_cuda_value_type",
-                "Invalid data type: " + cast_enum_to_str(onemkl_data_type));
+                "Invalid data type: " + cast_enum_to_str(onemath_data_type));
     }
 }
 
@@ -88,8 +88,8 @@ inline cusparseOrder_t get_cuda_order(layout l) {
         case layout::row_major: return CUSPARSE_ORDER_ROW;
         case layout::col_major: return CUSPARSE_ORDER_COL;
         default:
-            throw oneapi::mkl::invalid_argument("sparse_blas", "get_cuda_order",
-                                                "Unknown layout: " + cast_enum_to_str(l));
+            throw oneapi::math::invalid_argument("sparse_blas", "get_cuda_order",
+                                                 "Unknown layout: " + cast_enum_to_str(l));
     }
 }
 
@@ -98,12 +98,12 @@ inline cusparseIndexBase_t get_cuda_index_base(index_base index) {
         case index_base::zero: return CUSPARSE_INDEX_BASE_ZERO;
         case index_base::one: return CUSPARSE_INDEX_BASE_ONE;
         default:
-            throw oneapi::mkl::invalid_argument("sparse_blas", "get_cuda_index_base",
-                                                "Unknown index_base: " + cast_enum_to_str(index));
+            throw oneapi::math::invalid_argument("sparse_blas", "get_cuda_index_base",
+                                                 "Unknown index_base: " + cast_enum_to_str(index));
     }
 }
 
-/// Return the CUDA transpose operation from a oneMKL type.
+/// Return the CUDA transpose operation from a oneMath type.
 /// Do not conjugate for real types to avoid an invalid argument.
 inline cusparseOperation_t get_cuda_operation(data_type type, transpose op) {
     switch (op) {
@@ -114,7 +114,7 @@ inline cusparseOperation_t get_cuda_operation(data_type type, transpose op) {
                        ? CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE
                        : CUSPARSE_OPERATION_TRANSPOSE;
         default:
-            throw oneapi::mkl::invalid_argument(
+            throw oneapi::math::invalid_argument(
                 "sparse_blas", "get_cuda_operation",
                 "Unknown transpose operation: " + cast_enum_to_str(op));
     }
@@ -125,8 +125,8 @@ inline auto get_cuda_uplo(uplo uplo_val) {
         case uplo::upper: return CUSPARSE_FILL_MODE_UPPER;
         case uplo::lower: return CUSPARSE_FILL_MODE_LOWER;
         default:
-            throw oneapi::mkl::invalid_argument("sparse_blas", "get_cuda_uplo",
-                                                "Unknown uplo: " + cast_enum_to_str(uplo_val));
+            throw oneapi::math::invalid_argument("sparse_blas", "get_cuda_uplo",
+                                                 "Unknown uplo: " + cast_enum_to_str(uplo_val));
     }
 }
 
@@ -135,13 +135,13 @@ inline auto get_cuda_diag(diag diag_val) {
         case diag::nonunit: return CUSPARSE_DIAG_TYPE_NON_UNIT;
         case diag::unit: return CUSPARSE_DIAG_TYPE_UNIT;
         default:
-            throw oneapi::mkl::invalid_argument("sparse_blas", "get_cuda_diag",
-                                                "Unknown diag: " + cast_enum_to_str(diag_val));
+            throw oneapi::math::invalid_argument("sparse_blas", "get_cuda_diag",
+                                                 "Unknown diag: " + cast_enum_to_str(diag_val));
     }
 }
 
 inline void set_matrix_attributes(const std::string& func_name, cusparseSpMatDescr_t cu_a,
-                                  oneapi::mkl::sparse::matrix_view A_view) {
+                                  oneapi::math::sparse::matrix_view A_view) {
     auto cu_fill_mode = get_cuda_uplo(A_view.uplo_view);
     auto status = cusparseSpMatSetAttribute(cu_a, CUSPARSE_SPMAT_FILL_MODE, &cu_fill_mode,
                                             sizeof(cu_fill_mode));
@@ -161,6 +161,6 @@ inline void set_pointer_mode(cusparseHandle_t cu_handle, bool is_ptr_host_access
                                                              : CUSPARSE_POINTER_MODE_DEVICE);
 }
 
-} // namespace oneapi::mkl::sparse::cusparse::detail
+} // namespace oneapi::math::sparse::cusparse::detail
 
-#endif //_ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_HELPER_HPP_
+#endif //_ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_HELPER_HPP_
diff --git a/src/sparse_blas/backends/cusparse/cusparse_scope_handle.cpp b/src/sparse_blas/backends/cusparse/cusparse_scope_handle.cpp
index 4d92daf35..cc485ede1 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_scope_handle.cpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_scope_handle.cpp
@@ -23,7 +23,7 @@
 
 #include "cusparse_scope_handle.hpp"
 
-namespace oneapi::mkl::sparse::cusparse::detail {
+namespace oneapi::math::sparse::cusparse::detail {
 
 /**
  * Inserts a new element in the map if its key is unique. This new element
@@ -32,7 +32,7 @@ namespace oneapi::mkl::sparse::cusparse::detail {
  * takes place if no other element in the container has a key equivalent to
  * the one being emplaced (keys in a map container are unique).
  */
-#ifdef ONEAPI_ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEAPI_ONEMATH_PI_INTERFACE_REMOVED
 thread_local cusparse_global_handle<ur_context_handle_t>
     CusparseScopedContextHandler::handle_helper = cusparse_global_handle<ur_context_handle_t>{};
 #else
@@ -92,7 +92,7 @@ std::pair<cusparseHandle_t, CUstream> CusparseScopedContextHandler::get_handle_a
     auto cudaDevice = ih.get_native_device<sycl::backend::ext_oneapi_cuda>();
     CUcontext desired;
     CUDA_ERROR_FUNC(cuDevicePrimaryCtxRetain, &desired, cudaDevice);
-#ifdef ONEAPI_ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEAPI_ONEMATH_PI_INTERFACE_REMOVED
     auto piPlacedContext_ = reinterpret_cast<ur_context_handle_t>(desired);
 #else
     auto piPlacedContext_ = reinterpret_cast<pi_context>(desired);
@@ -144,4 +144,4 @@ sycl::context CusparseScopedContextHandler::get_context(const sycl::queue& queue
     return queue.get_context();
 }
 
-} // namespace oneapi::mkl::sparse::cusparse::detail
+} // namespace oneapi::math::sparse::cusparse::detail
diff --git a/src/sparse_blas/backends/cusparse/cusparse_scope_handle.hpp b/src/sparse_blas/backends/cusparse/cusparse_scope_handle.hpp
index 7b8313ee6..df2df2b6a 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_scope_handle.hpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_scope_handle.hpp
@@ -16,8 +16,8 @@
 *  limitations under the License.
 *
 **************************************************************************/
-#ifndef _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_SCOPE_HANDLE_HPP_
-#define _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_SCOPE_HANDLE_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_SCOPE_HANDLE_HPP_
+#define _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_SCOPE_HANDLE_HPP_
 
 /**
  * @file Similar to cublas_scope_handle.hpp
@@ -30,8 +30,8 @@
 #endif
 
 // After Plugin Interface removal in DPC++ ur.hpp is the new include
-#if __has_include(<sycl/detail/ur.hpp>) && !defined(ONEAPI_ONEMKL_PI_INTERFACE_REMOVED)
-#define ONEAPI_ONEMKL_PI_INTERFACE_REMOVED
+#if __has_include(<sycl/detail/ur.hpp>) && !defined(ONEAPI_ONEMATH_PI_INTERFACE_REMOVED)
+#define ONEAPI_ONEMATH_PI_INTERFACE_REMOVED
 #endif
 
 #include <thread>
@@ -40,7 +40,7 @@
 #include "cusparse_global_handle.hpp"
 #include "cusparse_helper.hpp"
 
-namespace oneapi::mkl::sparse::cusparse::detail {
+namespace oneapi::math::sparse::cusparse::detail {
 
 class CusparseScopedContextHandler {
     CUcontext original_;
@@ -48,7 +48,7 @@ class CusparseScopedContextHandler {
     sycl::interop_handle& ih;
     bool needToRecover_;
 
-#ifdef ONEAPI_ONEMKL_PI_INTERFACE_REMOVED
+#ifdef ONEAPI_ONEMATH_PI_INTERFACE_REMOVED
     static thread_local cusparse_global_handle<ur_context_handle_t> handle_helper;
 #else
     static thread_local cusparse_global_handle<pi_context> handle_helper;
@@ -83,6 +83,6 @@ inline void* get_mem(sycl::interop_handle ih, AccT acc) {
     return reinterpret_cast<void*>(cudaPtr);
 }
 
-} // namespace oneapi::mkl::sparse::cusparse::detail
+} // namespace oneapi::math::sparse::cusparse::detail
 
-#endif //_ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_SCOPE_HANDLE_HPP_
+#endif //_ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_SCOPE_HANDLE_HPP_
diff --git a/src/sparse_blas/backends/cusparse/cusparse_task.hpp b/src/sparse_blas/backends/cusparse/cusparse_task.hpp
index 0d86d642d..043cfaaf8 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_task.hpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_task.hpp
@@ -17,15 +17,15 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_TASKS_HPP_
-#define _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_TASKS_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_TASKS_HPP_
+#define _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_TASKS_HPP_
 
 #include "cusparse_handles.hpp"
 #include "cusparse_scope_handle.hpp"
 
 /// This file provide a helper function to submit host_task using buffers or USM seamlessly
 
-namespace oneapi::mkl::sparse::cusparse::detail {
+namespace oneapi::math::sparse::cusparse::detail {
 
 template <typename T, typename Container>
 auto get_value_accessor(sycl::handler& cgh, Container container) {
@@ -117,8 +117,8 @@ void submit_native_command_ext(sycl::handler& cgh, sycl::queue& queue, Functor f
             // extension ext_codeplay_enqueue_native_command is used to launch
             // the compute operation which depends on the previous optimize
             // step. In cuSPARSE the optimize step is synchronous but it is
-            // asynchronous in oneMKL Interface. The optimize step may not use
-            // the CUDA stream which would make it impossible for
+            // asynchronous in oneMath. The optimize step may not use the CUDA
+            // stream which would make it impossible for
             // ext_codeplay_enqueue_native_command to automatically ensure it
             // has completed before the compute function starts. These waits are
             // used to ensure the optimize step has completed before starting
@@ -152,16 +152,15 @@ void submit_native_command_ext_with_acc(sycl::handler& cgh, sycl::queue& queue,
         auto unused = std::make_tuple(capture_only_accessors...);
         (void)unused;
         // The functor using ext_codeplay_enqueue_native_command need to
-        // explicitly wait on the events for the SPARSE domain. The
-        // extension ext_codeplay_enqueue_native_command is used to launch
-        // the compute operation which depends on the previous optimize
-        // step. In cuSPARSE the optimize step is synchronous but it is
-        // asynchronous in oneMKL Interface. The optimize step may not use
-        // the CUDA stream which would make it impossible for
-        // ext_codeplay_enqueue_native_command to automatically ensure it
-        // has completed before the compute function starts. These waits are
-        // used to ensure the optimize step has completed before starting
-        // the computation.
+        // explicitly wait on the events for the SPARSE domain. The extension
+        // ext_codeplay_enqueue_native_command is used to launch the compute
+        // operation which depends on the previous optimize step. In cuSPARSE
+        // the optimize step is synchronous but it is asynchronous in oneMath.
+        // The optimize step may not use the CUDA stream which would make it
+        // impossible for ext_codeplay_enqueue_native_command to automatically
+        // ensure it has completed before the compute function starts. These
+        // waits are used to ensure the optimize step has completed before
+        // starting the computation.
         for (auto event : dependencies) {
             event.wait();
         }
@@ -199,7 +198,7 @@ sycl::event dispatch_submit_impl_fp_int(const std::string& function_name, sycl::
         data_type value_type = sm_handle->get_value_type();
         data_type int_type = sm_handle->get_int_type();
 
-#define ONEMKL_CUSPARSE_SUBMIT(FP_TYPE, INT_TYPE)                                                 \
+#define ONEMATH_CUSPARSE_SUBMIT(FP_TYPE, INT_TYPE)                                                \
     return queue.submit([&](sycl::handler& cgh) {                                                 \
         cgh.depends_on(dependencies);                                                             \
         auto fp_accs = get_fp_accessors<FP_TYPE>(cgh, sm_handle, other_containers...);            \
@@ -236,32 +235,32 @@ sycl::event dispatch_submit_impl_fp_int(const std::string& function_name, sycl::
             }                                                                                     \
         }                                                                                         \
     })
-#define ONEMKL_CUSPARSE_SUBMIT_INT(FP_TYPE)            \
-    if (int_type == data_type::int32) {                \
-        ONEMKL_CUSPARSE_SUBMIT(FP_TYPE, std::int32_t); \
-    }                                                  \
-    else if (int_type == data_type::int64) {           \
-        ONEMKL_CUSPARSE_SUBMIT(FP_TYPE, std::int64_t); \
+#define ONEMATH_CUSPARSE_SUBMIT_INT(FP_TYPE)            \
+    if (int_type == data_type::int32) {                 \
+        ONEMATH_CUSPARSE_SUBMIT(FP_TYPE, std::int32_t); \
+    }                                                   \
+    else if (int_type == data_type::int64) {            \
+        ONEMATH_CUSPARSE_SUBMIT(FP_TYPE, std::int64_t); \
     }
 
         if (value_type == data_type::real_fp32) {
-            ONEMKL_CUSPARSE_SUBMIT_INT(float)
+            ONEMATH_CUSPARSE_SUBMIT_INT(float)
         }
         else if (value_type == data_type::real_fp64) {
-            ONEMKL_CUSPARSE_SUBMIT_INT(double)
+            ONEMATH_CUSPARSE_SUBMIT_INT(double)
         }
         else if (value_type == data_type::complex_fp32) {
-            ONEMKL_CUSPARSE_SUBMIT_INT(std::complex<float>)
+            ONEMATH_CUSPARSE_SUBMIT_INT(std::complex<float>)
         }
         else if (value_type == data_type::complex_fp64) {
-            ONEMKL_CUSPARSE_SUBMIT_INT(std::complex<double>)
+            ONEMATH_CUSPARSE_SUBMIT_INT(std::complex<double>)
         }
 
-#undef ONEMKL_CUSPARSE_SUBMIT_INT
-#undef ONEMKL_CUSPARSE_SUBMIT
+#undef ONEMATH_CUSPARSE_SUBMIT_INT
+#undef ONEMATH_CUSPARSE_SUBMIT
 
-        throw oneapi::mkl::exception("sparse_blas", function_name,
-                                     "Could not dispatch buffer kernel to a supported type");
+        throw oneapi::math::exception("sparse_blas", function_name,
+                                      "Could not dispatch buffer kernel to a supported type");
     }
     else {
         // USM submit does not need to capture accessors
@@ -282,8 +281,8 @@ sycl::event dispatch_submit_impl_fp_int(const std::string& function_name, sycl::
             });
         }
         else {
-            throw oneapi::mkl::exception("sparse_blas", function_name,
-                                         "Internal error: Cannot use accessor workspace with USM");
+            throw oneapi::math::exception("sparse_blas", function_name,
+                                          "Internal error: Cannot use accessor workspace with USM");
         }
     }
 }
@@ -296,7 +295,7 @@ sycl::event dispatch_submit_impl_fp(const std::string& function_name, sycl::queu
     if (container_handle->all_use_buffer()) {
         data_type value_type = container_handle->get_value_type();
 
-#define ONEMKL_CUSPARSE_SUBMIT(FP_TYPE)                                  \
+#define ONEMATH_CUSPARSE_SUBMIT(FP_TYPE)                                 \
     return queue.submit([&](sycl::handler& cgh) {                        \
         cgh.depends_on(dependencies);                                    \
         auto fp_accs = get_fp_accessors<FP_TYPE>(cgh, container_handle); \
@@ -304,22 +303,22 @@ sycl::event dispatch_submit_impl_fp(const std::string& function_name, sycl::queu
     })
 
         if (value_type == data_type::real_fp32) {
-            ONEMKL_CUSPARSE_SUBMIT(float);
+            ONEMATH_CUSPARSE_SUBMIT(float);
         }
         else if (value_type == data_type::real_fp64) {
-            ONEMKL_CUSPARSE_SUBMIT(double);
+            ONEMATH_CUSPARSE_SUBMIT(double);
         }
         else if (value_type == data_type::complex_fp32) {
-            ONEMKL_CUSPARSE_SUBMIT(std::complex<float>);
+            ONEMATH_CUSPARSE_SUBMIT(std::complex<float>);
         }
         else if (value_type == data_type::complex_fp64) {
-            ONEMKL_CUSPARSE_SUBMIT(std::complex<double>);
+            ONEMATH_CUSPARSE_SUBMIT(std::complex<double>);
         }
 
-#undef ONEMKL_CUSPARSE_SUBMIT
+#undef ONEMATH_CUSPARSE_SUBMIT
 
-        throw oneapi::mkl::exception("sparse_blas", function_name,
-                                     "Could not dispatch buffer kernel to a supported type");
+        throw oneapi::math::exception("sparse_blas", function_name,
+                                      "Could not dispatch buffer kernel to a supported type");
     }
     else {
         return queue.submit([&](sycl::handler& cgh) {
@@ -426,6 +425,6 @@ inline void synchronize_if_needed(bool is_in_order_queue, CUstream cu_stream) {
 #endif
 }
 
-} // namespace oneapi::mkl::sparse::cusparse::detail
+} // namespace oneapi::math::sparse::cusparse::detail
 
-#endif // _ONEMKL_SPARSE_BLAS_BACKENDS_CUSPARSE_TASKS_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_BACKENDS_CUSPARSE_TASKS_HPP_
diff --git a/src/sparse_blas/backends/cusparse/cusparse_wrappers.cpp b/src/sparse_blas/backends/cusparse/cusparse_wrappers.cpp
index 278aec296..43a3994c1 100644
--- a/src/sparse_blas/backends/cusparse/cusparse_wrappers.cpp
+++ b/src/sparse_blas/backends/cusparse/cusparse_wrappers.cpp
@@ -17,16 +17,16 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 
-#include "oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp"
+#include "oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp"
 
 #include "sparse_blas/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         cusparse
 
-extern "C" sparse_blas_function_table_t mkl_sparse_blas_table = {
+extern "C" sparse_blas_function_table_t onemath_sparse_blas_table = {
     WRAPPER_VERSION,
 #include "sparse_blas/backends/backend_wrappers.cxx"
 };
diff --git a/src/sparse_blas/backends/cusparse/operations/cusparse_spmm.cpp b/src/sparse_blas/backends/cusparse/operations/cusparse_spmm.cpp
index 5fd24d3f4..e4131bec6 100644
--- a/src/sparse_blas/backends/cusparse/operations/cusparse_spmm.cpp
+++ b/src/sparse_blas/backends/cusparse/operations/cusparse_spmm.cpp
@@ -17,7 +17,7 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp"
+#include "oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp"
 
 #include "sparse_blas/backends/cusparse/cusparse_error.hpp"
 #include "sparse_blas/backends/cusparse/cusparse_helper.hpp"
@@ -28,7 +28,7 @@
 #include "sparse_blas/matrix_view_comparison.hpp"
 #include "sparse_blas/sycl_helper.hpp"
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 // Complete the definition of the incomplete type
 struct spmm_descr {
@@ -42,8 +42,8 @@ struct spmm_descr {
     std::size_t temp_buffer_size = 0;
     bool buffer_size_called = false;
     bool optimized_called = false;
-    oneapi::mkl::transpose last_optimized_opA;
-    oneapi::mkl::transpose last_optimized_opB;
+    transpose last_optimized_opA;
+    transpose last_optimized_opB;
     matrix_view last_optimized_A_view;
     matrix_handle_t last_optimized_A_handle;
     dense_matrix_handle_t last_optimized_B_handle;
@@ -51,9 +51,9 @@ struct spmm_descr {
     spmm_alg last_optimized_alg;
 };
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-namespace oneapi::mkl::sparse::cusparse {
+namespace oneapi::math::sparse::cusparse {
 
 namespace detail {
 
@@ -70,42 +70,42 @@ inline auto get_cuda_spmm_alg(spmm_alg alg) {
     }
 }
 
-void check_valid_spmm(const std::string& function_name, oneapi::mkl::transpose opA,
-                      oneapi::mkl::transpose opB, matrix_view A_view, matrix_handle_t A_handle,
-                      dense_matrix_handle_t B_handle, dense_matrix_handle_t C_handle,
-                      bool is_alpha_host_accessible, bool is_beta_host_accessible, spmm_alg alg) {
+void check_valid_spmm(const std::string& function_name, transpose opA, transpose opB,
+                      matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                      dense_matrix_handle_t C_handle, bool is_alpha_host_accessible,
+                      bool is_beta_host_accessible, spmm_alg alg) {
     check_valid_spmm_common(function_name, A_view, A_handle, B_handle, C_handle,
                             is_alpha_host_accessible, is_beta_host_accessible);
     check_valid_matrix_properties(function_name, A_handle);
-    if (alg == spmm_alg::csr_alg3 && opA != oneapi::mkl::transpose::nontrans) {
-        throw mkl::unimplemented(
+    if (alg == spmm_alg::csr_alg3 && opA != transpose::nontrans) {
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support spmm with the algorithm `spmm_alg::csr_alg3` if `opA` is not `transpose::nontrans`.");
     }
-    if (alg == spmm_alg::csr_alg3 && opB == oneapi::mkl::transpose::conjtrans) {
-        throw mkl::unimplemented(
+    if (alg == spmm_alg::csr_alg3 && opB == transpose::conjtrans) {
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support spmm with the algorithm `spmm_alg::csr_alg3` if `opB` is `transpose::conjtrans`.");
     }
-    if (alg == spmm_alg::csr_alg3 && opB == oneapi::mkl::transpose::trans &&
+    if (alg == spmm_alg::csr_alg3 && opB == transpose::trans &&
         A_handle->get_value_type() == data_type::real_fp64) {
         // TODO: Remove once the issue is fixed: https://forums.developer.nvidia.com/t/cusparse-spmm-sample-failing-with-misaligned-address/311022
-        throw mkl::unimplemented(
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support spmm with the algorithm `spmm_alg::csr_alg3` if `opB` is `transpose::trans` and the real fp64 precision is used.");
     }
 }
 
-inline void common_spmm_optimize(oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-                                 bool is_alpha_host_accessible, matrix_view A_view,
-                                 matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
-                                 bool is_beta_host_accessible, dense_matrix_handle_t C_handle,
-                                 spmm_alg alg, spmm_descr_t spmm_descr) {
+inline void common_spmm_optimize(transpose opA, transpose opB, bool is_alpha_host_accessible,
+                                 matrix_view A_view, matrix_handle_t A_handle,
+                                 dense_matrix_handle_t B_handle, bool is_beta_host_accessible,
+                                 dense_matrix_handle_t C_handle, spmm_alg alg,
+                                 spmm_descr_t spmm_descr) {
     check_valid_spmm("spmm_optimize", opA, opB, A_view, A_handle, B_handle, C_handle,
                      is_alpha_host_accessible, is_beta_host_accessible, alg);
     if (!spmm_descr->buffer_size_called) {
-        throw mkl::uninitialized("sparse_blas", "spmm_optimize",
-                                 "spmm_buffer_size must be called before spmm_optimize.");
+        throw math::uninitialized("sparse_blas", "spmm_optimize",
+                                  "spmm_buffer_size must be called before spmm_optimize.");
     }
     spmm_descr->optimized_called = true;
     spmm_descr->last_optimized_opA = opA;
@@ -117,9 +117,8 @@ inline void common_spmm_optimize(oneapi::mkl::transpose opA, oneapi::mkl::transp
     spmm_descr->last_optimized_alg = alg;
 }
 
-void spmm_optimize_impl(cusparseHandle_t cu_handle, oneapi::mkl::transpose opA,
-                        oneapi::mkl::transpose opB, const void* alpha, matrix_handle_t A_handle,
-                        dense_matrix_handle_t B_handle, const void* beta,
+void spmm_optimize_impl(cusparseHandle_t cu_handle, transpose opA, transpose opB, const void* alpha,
+                        matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void* beta,
                         dense_matrix_handle_t C_handle, spmm_alg alg, void* workspace_ptr,
                         bool is_alpha_host_accessible) {
     auto cu_a = A_handle->backend_handle;
@@ -179,11 +178,10 @@ sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr,
     return event;
 }
 
-void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-                      const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
-                      dense_matrix_handle_t B_handle, const void* beta,
-                      dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr,
-                      std::size_t& temp_buffer_size) {
+void spmm_buffer_size(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                      matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                      const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
+                      spmm_descr_t spmm_descr, std::size_t& temp_buffer_size) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     detail::check_valid_spmm(__func__, opA, opB, A_view, A_handle, B_handle, C_handle,
@@ -212,10 +210,10 @@ void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mk
     spmm_descr->buffer_size_called = true;
 }
 
-void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-                   const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
-                   dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
-                   spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace) {
+void spmm_optimize(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                   matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                   const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
+                   spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     if (!A_handle->all_use_buffer()) {
@@ -239,12 +237,11 @@ void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::
     detail::dispatch_submit(__func__, queue, functor, A_handle, workspace, B_handle, C_handle);
 }
 
-sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                          oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
-                          matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
-                          const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
-                          spmm_descr_t spmm_descr, void* workspace,
-                          const std::vector<sycl::event>& dependencies) {
+sycl::event spmm_optimize(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                          matrix_view A_view, matrix_handle_t A_handle,
+                          dense_matrix_handle_t B_handle, const void* beta,
+                          dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr,
+                          void* workspace, const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     if (A_handle->all_use_buffer()) {
@@ -267,11 +264,10 @@ sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
                                    C_handle);
 }
 
-sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-                 const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
-                 dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
-                 spmm_alg alg, spmm_descr_t spmm_descr,
-                 const std::vector<sycl::event>& dependencies) {
+sycl::event spmm(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                 matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                 const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
+                 spmm_descr_t spmm_descr, const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     detail::check_valid_spmm(__func__, opA, opB, A_view, A_handle, B_handle, C_handle,
@@ -281,8 +277,8 @@ sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::tr
     }
 
     if (!spmm_descr->optimized_called) {
-        throw mkl::uninitialized("sparse_blas", __func__,
-                                 "spmm_optimize must be called before spmm.");
+        throw math::uninitialized("sparse_blas", __func__,
+                                  "spmm_optimize must be called before spmm.");
     }
     CHECK_DESCR_MATCH(spmm_descr, opA, "spmm_optimize");
     CHECK_DESCR_MATCH(spmm_descr, opB, "spmm_optimize");
@@ -333,4 +329,4 @@ sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::tr
     }
 }
 
-} // namespace oneapi::mkl::sparse::cusparse
+} // namespace oneapi::math::sparse::cusparse
diff --git a/src/sparse_blas/backends/cusparse/operations/cusparse_spmv.cpp b/src/sparse_blas/backends/cusparse/operations/cusparse_spmv.cpp
index 03b848916..2af4a4e98 100644
--- a/src/sparse_blas/backends/cusparse/operations/cusparse_spmv.cpp
+++ b/src/sparse_blas/backends/cusparse/operations/cusparse_spmv.cpp
@@ -17,7 +17,7 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp"
+#include "oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp"
 
 #include "sparse_blas/backends/cusparse/cusparse_error.hpp"
 #include "sparse_blas/backends/cusparse/cusparse_helper.hpp"
@@ -28,7 +28,7 @@
 #include "sparse_blas/matrix_view_comparison.hpp"
 #include "sparse_blas/sycl_helper.hpp"
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 // Complete the definition of the incomplete type
 struct spmv_descr {
@@ -42,7 +42,7 @@ struct spmv_descr {
     std::size_t temp_buffer_size = 0;
     bool buffer_size_called = false;
     bool optimized_called = false;
-    oneapi::mkl::transpose last_optimized_opA;
+    transpose last_optimized_opA;
     matrix_view last_optimized_A_view;
     matrix_handle_t last_optimized_A_handle;
     dense_vector_handle_t last_optimized_x_handle;
@@ -50,9 +50,9 @@ struct spmv_descr {
     spmv_alg last_optimized_alg;
 };
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-namespace oneapi::mkl::sparse::cusparse {
+namespace oneapi::math::sparse::cusparse {
 
 namespace detail {
 
@@ -66,30 +66,29 @@ inline auto get_cuda_spmv_alg(spmv_alg alg) {
     }
 }
 
-void check_valid_spmv(const std::string& function_name, oneapi::mkl::transpose opA,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+void check_valid_spmv(const std::string& function_name, transpose opA, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       dense_vector_handle_t y_handle, bool is_alpha_host_accessible,
                       bool is_beta_host_accessible) {
     check_valid_spmv_common(function_name, opA, A_view, A_handle, x_handle, y_handle,
                             is_alpha_host_accessible, is_beta_host_accessible);
     check_valid_matrix_properties(function_name, A_handle);
     if (A_view.type_view != matrix_descr::general) {
-        throw mkl::unimplemented(
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support spmv with a `type_view` other than `matrix_descr::general`.");
     }
 }
 
-inline void common_spmv_optimize(oneapi::mkl::transpose opA, bool is_alpha_host_accessible,
-                                 matrix_view A_view, matrix_handle_t A_handle,
-                                 dense_vector_handle_t x_handle, bool is_beta_host_accessible,
-                                 dense_vector_handle_t y_handle, spmv_alg alg,
-                                 spmv_descr_t spmv_descr) {
+inline void common_spmv_optimize(transpose opA, bool is_alpha_host_accessible, matrix_view A_view,
+                                 matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                                 bool is_beta_host_accessible, dense_vector_handle_t y_handle,
+                                 spmv_alg alg, spmv_descr_t spmv_descr) {
     check_valid_spmv("spmv_optimize", opA, A_view, A_handle, x_handle, y_handle,
                      is_alpha_host_accessible, is_beta_host_accessible);
     if (!spmv_descr->buffer_size_called) {
-        throw mkl::uninitialized("sparse_blas", "spmv_optimize",
-                                 "spmv_buffer_size must be called before spmv_optimize.");
+        throw math::uninitialized("sparse_blas", "spmv_optimize",
+                                  "spmv_buffer_size must be called before spmv_optimize.");
     }
     spmv_descr->optimized_called = true;
     spmv_descr->last_optimized_opA = opA;
@@ -102,7 +101,7 @@ inline void common_spmv_optimize(oneapi::mkl::transpose opA, bool is_alpha_host_
 
 #if CUSPARSE_VERSION >= 12300
 // cusparseSpMV_preprocess was added in cuSPARSE 12.3.0.142 (CUDA 12.4)
-void spmv_optimize_impl(cusparseHandle_t cu_handle, oneapi::mkl::transpose opA, const void* alpha,
+void spmv_optimize_impl(cusparseHandle_t cu_handle, transpose opA, const void* alpha,
                         matrix_handle_t A_handle, dense_vector_handle_t x_handle, const void* beta,
                         dense_vector_handle_t y_handle, spmv_alg alg, void* workspace_ptr,
                         bool is_alpha_host_accessible) {
@@ -163,10 +162,10 @@ sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr,
     return event;
 }
 
-void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                      const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
-                      spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) {
+void spmv_buffer_size(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_vector_handle_t x_handle, const void* beta,
+                      dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
+                      std::size_t& temp_buffer_size) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     detail::check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle,
@@ -195,10 +194,10 @@ void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void
     spmv_descr->buffer_size_called = true;
 }
 
-void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                   matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                   const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
-                   spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> workspace) {
+void spmv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                   matrix_handle_t A_handle, dense_vector_handle_t x_handle, const void* beta,
+                   dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
+                   sycl::buffer<std::uint8_t, 1> workspace) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     if (!A_handle->all_use_buffer()) {
@@ -238,11 +237,11 @@ void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* a
 #endif
 }
 
-sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                          matrix_view A_view, matrix_handle_t A_handle,
-                          dense_vector_handle_t x_handle, const void* beta,
-                          dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
-                          void* workspace, const std::vector<sycl::event>& dependencies) {
+sycl::event spmv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                          matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                          const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
+                          spmv_descr_t spmv_descr, void* workspace,
+                          const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     if (A_handle->all_use_buffer()) {
@@ -269,10 +268,10 @@ sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const
 #endif
 }
 
-sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                 matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                 const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
-                 spmv_descr_t spmv_descr, const std::vector<sycl::event>& dependencies) {
+sycl::event spmv(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                 matrix_handle_t A_handle, dense_vector_handle_t x_handle, const void* beta,
+                 dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
+                 const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     detail::check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle,
@@ -282,8 +281,8 @@ sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
     }
 
     if (!spmv_descr->optimized_called) {
-        throw mkl::uninitialized("sparse_blas", __func__,
-                                 "spmv_optimize must be called before spmv.");
+        throw math::uninitialized("sparse_blas", __func__,
+                                  "spmv_optimize must be called before spmv.");
     }
     CHECK_DESCR_MATCH(spmv_descr, opA, "spmv_optimize");
     CHECK_DESCR_MATCH(spmv_descr, A_view, "spmv_optimize");
@@ -332,4 +331,4 @@ sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
     }
 }
 
-} // namespace oneapi::mkl::sparse::cusparse
+} // namespace oneapi::math::sparse::cusparse
diff --git a/src/sparse_blas/backends/cusparse/operations/cusparse_spsv.cpp b/src/sparse_blas/backends/cusparse/operations/cusparse_spsv.cpp
index 5c49df013..affad658b 100644
--- a/src/sparse_blas/backends/cusparse/operations/cusparse_spsv.cpp
+++ b/src/sparse_blas/backends/cusparse/operations/cusparse_spsv.cpp
@@ -17,7 +17,7 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/detail/cusparse/onemkl_sparse_blas_cusparse.hpp"
+#include "oneapi/math/sparse_blas/detail/cusparse/onemath_sparse_blas_cusparse.hpp"
 
 #include "sparse_blas/backends/cusparse/cusparse_error.hpp"
 #include "sparse_blas/backends/cusparse/cusparse_helper.hpp"
@@ -28,7 +28,7 @@
 #include "sparse_blas/matrix_view_comparison.hpp"
 #include "sparse_blas/sycl_helper.hpp"
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 // Complete the definition of the incomplete type
 struct spsv_descr {
@@ -42,7 +42,7 @@ struct spsv_descr {
     detail::generic_container workspace;
     bool buffer_size_called = false;
     bool optimized_called = false;
-    oneapi::mkl::transpose last_optimized_opA;
+    transpose last_optimized_opA;
     matrix_view last_optimized_A_view;
     matrix_handle_t last_optimized_A_handle;
     dense_vector_handle_t last_optimized_x_handle;
@@ -50,9 +50,9 @@ struct spsv_descr {
     spsv_alg last_optimized_alg;
 };
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-namespace oneapi::mkl::sparse::cusparse {
+namespace oneapi::math::sparse::cusparse {
 
 namespace detail {
 
@@ -68,15 +68,15 @@ void check_valid_spsv(const std::string& function_name, matrix_view A_view,
     check_valid_matrix_properties(function_name, A_handle);
 }
 
-inline void common_spsv_optimize(oneapi::mkl::transpose opA, bool is_alpha_host_accessible,
-                                 matrix_view A_view, matrix_handle_t A_handle,
-                                 dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
-                                 spsv_alg alg, spsv_descr_t spsv_descr) {
+inline void common_spsv_optimize(transpose opA, bool is_alpha_host_accessible, matrix_view A_view,
+                                 matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                                 dense_vector_handle_t y_handle, spsv_alg alg,
+                                 spsv_descr_t spsv_descr) {
     check_valid_spsv("spsv_optimize", A_view, A_handle, x_handle, y_handle,
                      is_alpha_host_accessible);
     if (!spsv_descr->buffer_size_called) {
-        throw mkl::uninitialized("sparse_blas", "spsv_optimize",
-                                 "spsv_buffer_size must be called before spsv_optimize.");
+        throw math::uninitialized("sparse_blas", "spsv_optimize",
+                                  "spsv_buffer_size must be called before spsv_optimize.");
     }
     spsv_descr->optimized_called = true;
     spsv_descr->last_optimized_opA = opA;
@@ -87,7 +87,7 @@ inline void common_spsv_optimize(oneapi::mkl::transpose opA, bool is_alpha_host_
     spsv_descr->last_optimized_alg = alg;
 }
 
-void spsv_optimize_impl(cusparseHandle_t cu_handle, oneapi::mkl::transpose opA, const void* alpha,
+void spsv_optimize_impl(cusparseHandle_t cu_handle, transpose opA, const void* alpha,
                         matrix_view A_view, matrix_handle_t A_handle,
                         dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
                         spsv_alg alg, spsv_descr_t spsv_descr, void* workspace_ptr,
@@ -153,8 +153,8 @@ sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr,
     return event;
 }
 
-void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+void spsv_buffer_size(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                       std::size_t& temp_buffer_size) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
@@ -184,8 +184,8 @@ void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void
     spsv_descr->buffer_size_called = true;
 }
 
-void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                   matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+void spsv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                   matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                    dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                    sycl::buffer<std::uint8_t, 1> workspace) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
@@ -220,11 +220,10 @@ void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* a
     }
 }
 
-sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                          matrix_view A_view, matrix_handle_t A_handle,
-                          dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
-                          spsv_alg alg, spsv_descr_t spsv_descr, void* workspace,
-                          const std::vector<sycl::event>& dependencies) {
+sycl::event spsv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                          matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                          dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
+                          void* workspace, const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     if (A_handle->all_use_buffer()) {
         detail::throw_incompatible_container(__func__);
@@ -242,8 +241,8 @@ sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const
                                    y_handle);
 }
 
-sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                 matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+sycl::event spsv(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                 matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                  dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                  const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
@@ -254,8 +253,8 @@ sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
     }
 
     if (!spsv_descr->optimized_called) {
-        throw mkl::uninitialized("sparse_blas", __func__,
-                                 "spsv_optimize must be called before spsv.");
+        throw math::uninitialized("sparse_blas", __func__,
+                                  "spsv_optimize must be called before spsv.");
     }
     CHECK_DESCR_MATCH(spsv_descr, opA, "spsv_optimize");
     CHECK_DESCR_MATCH(spsv_descr, A_view, "spsv_optimize");
@@ -286,4 +285,4 @@ sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
                                               x_handle, y_handle);
 }
 
-} // namespace oneapi::mkl::sparse::cusparse
+} // namespace oneapi::math::sparse::cusparse
diff --git a/src/sparse_blas/backends/mkl_common/mkl_dispatch.hpp b/src/sparse_blas/backends/mkl_common/mkl_dispatch.hpp
index 28c628438..b678a3d1a 100644
--- a/src/sparse_blas/backends/mkl_common/mkl_dispatch.hpp
+++ b/src/sparse_blas/backends/mkl_common/mkl_dispatch.hpp
@@ -17,11 +17,11 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_DISPATCH_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_DISPATCH_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_DISPATCH_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_DISPATCH_HPP_
 
 /// Convert \p value_type to template type argument and use it to call \p op_functor.
-#define DISPATCH_MKL_OPERATION(function_name, value_type, op_functor, ...)                         \
+#define DISPATCH_ONEMATH_OPERATION(function_name, value_type, op_functor, ...)                     \
     switch (value_type) {                                                                          \
         case detail::data_type::real_fp32: return op_functor<float>(__VA_ARGS__);                  \
         case detail::data_type::real_fp64: return op_functor<double>(__VA_ARGS__);                 \
@@ -29,9 +29,9 @@
         case detail::data_type::complex_fp64:                                                      \
             return op_functor<std::complex<double>>(__VA_ARGS__);                                  \
         default:                                                                                   \
-            throw oneapi::mkl::exception(                                                          \
+            throw oneapi::math::exception(                                                         \
                 "sparse_blas", function_name,                                                      \
                 "Internal error: unsupported type " + data_type_to_str(value_type));               \
     }
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_DISPATCH_HPP_
+#endif // _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_DISPATCH_HPP_
diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx
index 5fa5ea0a4..2fa355e21 100644
--- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx
+++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx
@@ -17,8 +17,6 @@
 *
 **************************************************************************/
 
-// In this file functions and types using the namespace oneapi::mkl::sparse:: refer to the backend's namespace for better readability.
-
 // Dense vector
 template <typename fpType>
 void init_dense_vector(sycl::queue& /*queue*/, dense_vector_handle_t* p_dvhandle, std::int64_t size,
@@ -59,21 +57,21 @@ sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhan
 template <typename fpType>
 void init_dense_matrix(sycl::queue& /*queue*/, dense_matrix_handle_t* p_dmhandle,
                        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
-                       oneapi::mkl::layout dense_layout, sycl::buffer<fpType, 1> val) {
+                       layout dense_layout, sycl::buffer<fpType, 1> val) {
     *p_dmhandle = new dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout);
 }
 
 template <typename fpType>
 void init_dense_matrix(sycl::queue& /*queue*/, dense_matrix_handle_t* p_dmhandle,
                        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
-                       oneapi::mkl::layout dense_layout, fpType* val) {
+                       layout dense_layout, fpType* val) {
     *p_dmhandle = new dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout);
 }
 
 template <typename fpType>
 void set_dense_matrix_data(sycl::queue& /*queue*/, dense_matrix_handle_t dmhandle,
                            std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
-                           oneapi::mkl::layout dense_layout, sycl::buffer<fpType, 1> val) {
+                           layout dense_layout, sycl::buffer<fpType, 1> val) {
     detail::check_can_reset_value_handle<fpType>(__func__, dmhandle, true);
     dmhandle->num_rows = num_rows;
     dmhandle->num_cols = num_cols;
@@ -85,7 +83,7 @@ void set_dense_matrix_data(sycl::queue& /*queue*/, dense_matrix_handle_t dmhandl
 template <typename fpType>
 void set_dense_matrix_data(sycl::queue& /*queue*/, dense_matrix_handle_t dmhandle,
                            std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,
-                           oneapi::mkl::layout dense_layout, fpType* val) {
+                           layout dense_layout, fpType* val) {
     detail::check_can_reset_value_handle<fpType>(__func__, dmhandle, false);
     dmhandle->num_rows = num_rows;
     dmhandle->num_cols = num_cols;
@@ -103,45 +101,47 @@ sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhan
 
 // COO matrix
 template <typename fpType, typename intType>
-void init_coo_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle,
+void init_coo_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle,
                      std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                     oneapi::mkl::index_base index, sycl::buffer<intType, 1> row_ind,
+                     index_base index, sycl::buffer<intType, 1> row_ind,
                      sycl::buffer<intType, 1> col_ind, sycl::buffer<fpType, 1> val) {
-    oneapi::mkl::sparse::matrix_handle_t mkl_handle;
-    oneapi::mkl::sparse::init_matrix_handle(&mkl_handle);
-    auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val,
+    oneapi::mkl::sparse::matrix_handle_t onemkl_handle;
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::init_matrix_handle(&onemkl_handle));
+    auto internal_smhandle = new detail::sparse_matrix_handle(onemkl_handle, row_ind, col_ind, val,
                                                               detail::sparse_format::COO, num_rows,
                                                               num_cols, nnz, index);
     // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released.
-    oneapi::mkl::sparse::set_coo_data(queue, mkl_handle, static_cast<intType>(num_rows),
-                                      static_cast<intType>(num_cols), static_cast<intType>(nnz),
-                                      index, internal_smhandle->row_container.get_buffer<intType>(),
-                                      internal_smhandle->col_container.get_buffer<intType>(),
-                                      internal_smhandle->value_container.get_buffer<fpType>());
-    *p_smhandle = reinterpret_cast<oneapi::mkl::sparse::matrix_handle_t>(internal_smhandle);
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_coo_data(
+        queue, onemkl_handle, static_cast<intType>(num_rows), static_cast<intType>(num_cols),
+        static_cast<intType>(nnz), detail::get_onemkl_index_base(index),
+        internal_smhandle->row_container.get_buffer<intType>(),
+        internal_smhandle->col_container.get_buffer<intType>(),
+        internal_smhandle->value_container.get_buffer<fpType>()));
+    *p_smhandle = reinterpret_cast<oneapi::math::sparse::matrix_handle_t>(internal_smhandle);
 }
 
 template <typename fpType, typename intType>
-void init_coo_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle,
+void init_coo_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle,
                      std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                     oneapi::mkl::index_base index, intType* row_ind, intType* col_ind,
-                     fpType* val) {
-    oneapi::mkl::sparse::matrix_handle_t mkl_handle;
-    oneapi::mkl::sparse::init_matrix_handle(&mkl_handle);
-    auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val,
+                     index_base index, intType* row_ind, intType* col_ind, fpType* val) {
+    oneapi::mkl::sparse::matrix_handle_t onemkl_handle;
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::init_matrix_handle(&onemkl_handle));
+    auto internal_smhandle = new detail::sparse_matrix_handle(onemkl_handle, row_ind, col_ind, val,
                                                               detail::sparse_format::COO, num_rows,
                                                               num_cols, nnz, index);
-    auto event = oneapi::mkl::sparse::set_coo_data(
-        queue, mkl_handle, static_cast<intType>(num_rows), static_cast<intType>(num_cols),
-        static_cast<intType>(nnz), index, row_ind, col_ind, val);
+    sycl::event event;
+    RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_coo_data(
+                                  queue, onemkl_handle, static_cast<intType>(num_rows),
+                                  static_cast<intType>(num_cols), static_cast<intType>(nnz),
+                                  detail::get_onemkl_index_base(index), row_ind, col_ind, val));
     event.wait_and_throw();
-    *p_smhandle = reinterpret_cast<oneapi::mkl::sparse::matrix_handle_t>(internal_smhandle);
+    *p_smhandle = reinterpret_cast<oneapi::math::sparse::matrix_handle_t>(internal_smhandle);
 }
 
 template <typename fpType, typename intType>
-void set_coo_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle,
+void set_coo_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle,
                          std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                         oneapi::mkl::index_base index, sycl::buffer<intType, 1> row_ind,
+                         index_base index, sycl::buffer<intType, 1> row_ind,
                          sycl::buffer<intType, 1> col_ind, sycl::buffer<fpType, 1> val) {
     auto internal_smhandle = detail::get_internal_handle(smhandle);
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, internal_smhandle, true);
@@ -153,19 +153,19 @@ void set_coo_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_
     internal_smhandle->col_container.set_buffer(col_ind);
     internal_smhandle->value_container.set_buffer(val);
     // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released.
-    oneapi::mkl::sparse::set_coo_data(queue, internal_smhandle->backend_handle,
-                                      static_cast<intType>(num_rows),
-                                      static_cast<intType>(num_cols), static_cast<intType>(nnz),
-                                      index, internal_smhandle->row_container.get_buffer<intType>(),
-                                      internal_smhandle->col_container.get_buffer<intType>(),
-                                      internal_smhandle->value_container.get_buffer<fpType>());
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_coo_data(
+        queue, internal_smhandle->backend_handle, static_cast<intType>(num_rows),
+        static_cast<intType>(num_cols), static_cast<intType>(nnz),
+        detail::get_onemkl_index_base(index),
+        internal_smhandle->row_container.get_buffer<intType>(),
+        internal_smhandle->col_container.get_buffer<intType>(),
+        internal_smhandle->value_container.get_buffer<fpType>()));
 }
 
 template <typename fpType, typename intType>
-void set_coo_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle,
+void set_coo_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle,
                          std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                         oneapi::mkl::index_base index, intType* row_ind, intType* col_ind,
-                         fpType* val) {
+                         index_base index, intType* row_ind, intType* col_ind, fpType* val) {
     auto internal_smhandle = detail::get_internal_handle(smhandle);
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, internal_smhandle, false);
     internal_smhandle->num_rows = num_rows;
@@ -175,9 +175,12 @@ void set_coo_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_
     internal_smhandle->row_container.set_usm_ptr(row_ind);
     internal_smhandle->col_container.set_usm_ptr(col_ind);
     internal_smhandle->value_container.set_usm_ptr(val);
-    auto event = oneapi::mkl::sparse::set_coo_data(
-        queue, internal_smhandle->backend_handle, static_cast<intType>(num_rows),
-        static_cast<intType>(num_cols), static_cast<intType>(nnz), index, row_ind, col_ind, val);
+    sycl::event event;
+    RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_coo_data(
+                                  queue, internal_smhandle->backend_handle,
+                                  static_cast<intType>(num_rows), static_cast<intType>(num_cols),
+                                  static_cast<intType>(nnz), detail::get_onemkl_index_base(index),
+                                  row_ind, col_ind, val));
     event.wait_and_throw();
 }
 
@@ -185,47 +188,49 @@ FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_COO_MATRIX_FUNCS);
 
 // CSR matrix
 template <typename fpType, typename intType>
-void init_csr_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle,
+void init_csr_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle,
                      std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                     oneapi::mkl::index_base index, sycl::buffer<intType, 1> row_ptr,
+                     index_base index, sycl::buffer<intType, 1> row_ptr,
                      sycl::buffer<intType, 1> col_ind, sycl::buffer<fpType, 1> val) {
-    oneapi::mkl::sparse::matrix_handle_t mkl_handle;
-    oneapi::mkl::sparse::init_matrix_handle(&mkl_handle);
-    auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val,
+    oneapi::mkl::sparse::matrix_handle_t onemkl_handle;
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::init_matrix_handle(&onemkl_handle));
+    auto internal_smhandle = new detail::sparse_matrix_handle(onemkl_handle, row_ptr, col_ind, val,
                                                               detail::sparse_format::CSR, num_rows,
                                                               num_cols, nnz, index);
     // The backend deduces nnz from row_ptr.
     // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released.
-    oneapi::mkl::sparse::set_csr_data(queue, mkl_handle, static_cast<intType>(num_rows),
-                                      static_cast<intType>(num_cols), index,
-                                      internal_smhandle->row_container.get_buffer<intType>(),
-                                      internal_smhandle->col_container.get_buffer<intType>(),
-                                      internal_smhandle->value_container.get_buffer<fpType>());
-    *p_smhandle = reinterpret_cast<oneapi::mkl::sparse::matrix_handle_t>(internal_smhandle);
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_csr_data(
+        queue, onemkl_handle, static_cast<intType>(num_rows), static_cast<intType>(num_cols),
+        detail::get_onemkl_index_base(index),
+        internal_smhandle->row_container.get_buffer<intType>(),
+        internal_smhandle->col_container.get_buffer<intType>(),
+        internal_smhandle->value_container.get_buffer<fpType>()));
+    *p_smhandle = reinterpret_cast<oneapi::math::sparse::matrix_handle_t>(internal_smhandle);
 }
 
 template <typename fpType, typename intType>
-void init_csr_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle,
+void init_csr_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle,
                      std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                     oneapi::mkl::index_base index, intType* row_ptr, intType* col_ind,
-                     fpType* val) {
-    oneapi::mkl::sparse::matrix_handle_t mkl_handle;
-    oneapi::mkl::sparse::init_matrix_handle(&mkl_handle);
-    auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val,
+                     index_base index, intType* row_ptr, intType* col_ind, fpType* val) {
+    oneapi::mkl::sparse::matrix_handle_t onemkl_handle;
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::init_matrix_handle(&onemkl_handle));
+    auto internal_smhandle = new detail::sparse_matrix_handle(onemkl_handle, row_ptr, col_ind, val,
                                                               detail::sparse_format::CSR, num_rows,
                                                               num_cols, nnz, index);
     // The backend deduces nnz from row_ptr.
-    auto event = oneapi::mkl::sparse::set_csr_data(
-        queue, mkl_handle, static_cast<intType>(num_rows), static_cast<intType>(num_cols), index,
-        row_ptr, col_ind, val);
+    sycl::event event;
+    RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_csr_data(
+                                  queue, onemkl_handle, static_cast<intType>(num_rows),
+                                  static_cast<intType>(num_cols),
+                                  detail::get_onemkl_index_base(index), row_ptr, col_ind, val));
     event.wait_and_throw();
-    *p_smhandle = reinterpret_cast<oneapi::mkl::sparse::matrix_handle_t>(internal_smhandle);
+    *p_smhandle = reinterpret_cast<oneapi::math::sparse::matrix_handle_t>(internal_smhandle);
 }
 
 template <typename fpType, typename intType>
-void set_csr_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle,
+void set_csr_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle,
                          std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                         oneapi::mkl::index_base index, sycl::buffer<intType, 1> row_ptr,
+                         index_base index, sycl::buffer<intType, 1> row_ptr,
                          sycl::buffer<intType, 1> col_ind, sycl::buffer<fpType, 1> val) {
     auto internal_smhandle = detail::get_internal_handle(smhandle);
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, internal_smhandle, true);
@@ -238,19 +243,18 @@ void set_csr_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_
     internal_smhandle->value_container.set_buffer(val);
     // The backend deduces nnz from row_ptr.
     // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released.
-    oneapi::mkl::sparse::set_csr_data(queue, internal_smhandle->backend_handle,
-                                      static_cast<intType>(num_rows),
-                                      static_cast<intType>(num_cols), index,
-                                      internal_smhandle->row_container.get_buffer<intType>(),
-                                      internal_smhandle->col_container.get_buffer<intType>(),
-                                      internal_smhandle->value_container.get_buffer<fpType>());
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_csr_data(
+        queue, internal_smhandle->backend_handle, static_cast<intType>(num_rows),
+        static_cast<intType>(num_cols), detail::get_onemkl_index_base(index),
+        internal_smhandle->row_container.get_buffer<intType>(),
+        internal_smhandle->col_container.get_buffer<intType>(),
+        internal_smhandle->value_container.get_buffer<fpType>()));
 }
 
 template <typename fpType, typename intType>
-void set_csr_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle,
+void set_csr_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle,
                          std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                         oneapi::mkl::index_base index, intType* row_ptr, intType* col_ind,
-                         fpType* val) {
+                         index_base index, intType* row_ptr, intType* col_ind, fpType* val) {
     auto internal_smhandle = detail::get_internal_handle(smhandle);
     detail::check_can_reset_sparse_handle<fpType, intType>(__func__, internal_smhandle, false);
     internal_smhandle->num_rows = num_rows;
@@ -261,39 +265,43 @@ void set_csr_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_
     internal_smhandle->col_container.set_usm_ptr(col_ind);
     internal_smhandle->value_container.set_usm_ptr(val);
     // The backend deduces nnz from row_ptr.
-    auto event = oneapi::mkl::sparse::set_csr_data(
-        queue, internal_smhandle->backend_handle, static_cast<intType>(num_rows),
-        static_cast<intType>(num_cols), index, row_ptr, col_ind, val);
+    sycl::event event;
+    RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_csr_data(
+                                  queue, internal_smhandle->backend_handle,
+                                  static_cast<intType>(num_rows), static_cast<intType>(num_cols),
+                                  detail::get_onemkl_index_base(index), row_ptr, col_ind, val));
     event.wait_and_throw();
 }
 
 FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_CSR_MATRIX_FUNCS);
 
 // Common sparse matrix functions
-sycl::event release_sparse_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle,
+sycl::event release_sparse_matrix(sycl::queue& queue,
+                                  oneapi::math::sparse::matrix_handle_t smhandle,
                                   const std::vector<sycl::event>& dependencies) {
     auto internal_smhandle = detail::get_internal_handle(smhandle);
     // Asynchronously release the backend's handle followed by the internal handle.
-    auto event = oneapi::mkl::sparse::release_matrix_handle(
-        queue, &internal_smhandle->backend_handle, dependencies);
+    sycl::event event;
+    RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::release_matrix_handle(
+                                  queue, &internal_smhandle->backend_handle, dependencies));
     return detail::submit_release(queue, internal_smhandle, { event });
 }
 
-bool set_matrix_property(sycl::queue& /*queue*/, oneapi::mkl::sparse::matrix_handle_t smhandle,
+bool set_matrix_property(sycl::queue& /*queue*/, oneapi::math::sparse::matrix_handle_t smhandle,
                          matrix_property property) {
     auto internal_smhandle = detail::get_internal_handle(smhandle);
     // Store the matrix property internally for better error checking
     internal_smhandle->set_matrix_property(property);
     // Set the matrix property on the backend handle
-    // Backend and oneMKL interface types for the property don't match
+    // Backend and oneMath types for the property don't match
     switch (property) {
         case matrix_property::symmetric:
-            oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle,
-                                                     oneapi::mkl::sparse::property::symmetric);
+            RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_matrix_property(
+                internal_smhandle->backend_handle, oneapi::mkl::sparse::property::symmetric));
             return true;
         case matrix_property::sorted:
-            oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle,
-                                                     oneapi::mkl::sparse::property::sorted);
+            RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_matrix_property(
+                internal_smhandle->backend_handle, oneapi::mkl::sparse::property::sorted));
             return true;
         default: return false;
     }
diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.hpp b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp
index 1bce0b8fb..b046668f6 100644
--- a/src/sparse_blas/backends/mkl_common/mkl_handles.hpp
+++ b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp
@@ -17,22 +17,20 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_
 
-// MKLCPU and MKLGPU backends include
-// This include defines its own oneapi::mkl::sparse namespace with some of the
-// types that are used here: matrix_handle_t, index_base, transpose, uplo, diag.
-#include <oneapi/mkl/spblas.hpp>
+// Intel(R) oneMKL header
+#include <mkl/spblas.hpp>
 
 #include "sparse_blas/generic_container.hpp"
 #include "sparse_blas/macros.hpp"
 #include "sparse_blas/sycl_helper.hpp"
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 // Complete the definition of incomplete types dense_vector_handle and
-// dense_matrix_handle as they don't exist in oneMKL backends yet.
+// dense_matrix_handle as they don't exist in oneMath backends yet.
 
 struct dense_vector_handle : public detail::generic_dense_vector_handle<void*> {
     template <typename T>
@@ -58,25 +56,27 @@ struct dense_matrix_handle : public detail::generic_dense_matrix_handle<void*> {
                                                          ld, dense_layout) {}
 };
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-namespace oneapi::mkl::sparse::detail {
+namespace oneapi::math::sparse::detail {
+
+using namespace oneapi::math::detail;
 
 /**
  * Internal sparse_matrix_handle type for MKLCPU and MKLGPU backends.
  * Here \p matrix_handle_t is the type of the backend's handle.
  * The user-facing incomplete type matrix_handle_t must be kept incomplete.
  * Internally matrix_handle_t is reinterpret_cast as
- * oneapi::mkl::sparse::detail::sparse_matrix_handle which holds another
+ * oneapi::math::sparse::detail::sparse_matrix_handle which holds another
  * matrix_handle_t for the backend handle.
  */
-using sparse_matrix_handle = detail::generic_sparse_handle<matrix_handle_t>;
+using sparse_matrix_handle = detail::generic_sparse_handle<oneapi::mkl::sparse::matrix_handle_t>;
 
-/// Cast to oneMKL's interface handle type
-inline auto get_internal_handle(matrix_handle_t handle) {
+/// Cast to oneMath's internal handle type
+inline auto get_internal_handle(oneapi::math::sparse::matrix_handle_t handle) {
     return reinterpret_cast<sparse_matrix_handle*>(handle);
 }
 
-} // namespace oneapi::mkl::sparse::detail
+} // namespace oneapi::math::sparse::detail
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_
+#endif // _ONEMATH_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_
diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx
index 9c0bc577b..5490393b9 100644
--- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx
+++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx
@@ -17,15 +17,13 @@
 *
 **************************************************************************/
 
-// In this file functions and types using the namespace oneapi::mkl::sparse:: refer to the backend's namespace for better readability.
-
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 struct spmm_descr {
     bool buffer_size_called = false;
     bool optimized_called = false;
-    oneapi::mkl::transpose last_optimized_opA;
-    oneapi::mkl::transpose last_optimized_opB;
+    transpose last_optimized_opA;
+    transpose last_optimized_opB;
     matrix_view last_optimized_A_view;
     matrix_handle_t last_optimized_A_handle;
     dense_matrix_handle_t last_optimized_B_handle;
@@ -33,9 +31,9 @@ struct spmm_descr {
     spmm_alg last_optimized_alg;
 };
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-namespace oneapi::mkl::sparse::BACKEND {
+namespace oneapi::math::sparse::BACKEND {
 
 void init_spmm_descr(sycl::queue& /*queue*/, spmm_descr_t* p_spmm_descr) {
     *p_spmm_descr = new spmm_descr();
@@ -46,8 +44,8 @@ sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr,
     return detail::submit_release(queue, spmm_descr, dependencies);
 }
 
-void check_valid_spmm(const std::string& function_name, oneapi::mkl::transpose opA,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+void check_valid_spmm(const std::string& function_name, transpose opA, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
                       dense_matrix_handle_t C_handle, bool is_alpha_host_accessible,
                       bool is_beta_host_accessible) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
@@ -58,9 +56,9 @@ void check_valid_spmm(const std::string& function_name, oneapi::mkl::transpose o
     detail::data_type data_type = internal_A_handle->get_value_type();
     if ((data_type == detail::data_type::complex_fp32 ||
          data_type == detail::data_type::complex_fp64) &&
-        opA == oneapi::mkl::transpose::conjtrans &&
+        opA == transpose::conjtrans &&
         internal_A_handle->has_matrix_property(matrix_property::symmetric)) {
-        throw mkl::unimplemented(
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support spmm using conjtrans and the symmetric property.");
     }
@@ -69,12 +67,11 @@ void check_valid_spmm(const std::string& function_name, oneapi::mkl::transpose o
 #endif // BACKEND
 }
 
-void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA,
-                      oneapi::mkl::transpose /*opB*/, const void* alpha, matrix_view A_view,
-                      matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void* beta,
-                      dense_matrix_handle_t C_handle, spmm_alg /*alg*/, spmm_descr_t spmm_descr,
-                      std::size_t& temp_buffer_size) {
-    // TODO: Add support for external workspace once the close-source oneMKL backend supports it.
+void spmm_buffer_size(sycl::queue& queue, transpose opA, transpose /*opB*/, const void* alpha,
+                      matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                      const void* beta, dense_matrix_handle_t C_handle, spmm_alg /*alg*/,
+                      spmm_descr_t spmm_descr, std::size_t& temp_buffer_size) {
+    // TODO: Add support for external workspace once the Intel(R) oneMKL backends support it.
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible,
@@ -83,18 +80,18 @@ void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA,
     spmm_descr->buffer_size_called = true;
 }
 
-inline void common_spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                 oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
-                                 matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
-                                 const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
+inline void common_spmm_optimize(sycl::queue& queue, transpose opA, transpose opB,
+                                 const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
+                                 dense_matrix_handle_t B_handle, const void* beta,
+                                 dense_matrix_handle_t C_handle, spmm_alg alg,
                                  spmm_descr_t spmm_descr) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     check_valid_spmm("spmm_optimize", opA, A_view, A_handle, B_handle, C_handle,
                      is_alpha_host_accessible, is_beta_host_accessible);
     if (!spmm_descr->buffer_size_called) {
-        throw mkl::uninitialized("sparse_blas", "spmm_optimize",
-                                 "spmm_buffer_size must be called before spmm_optimize.");
+        throw math::uninitialized("sparse_blas", "spmm_optimize",
+                                  "spmm_buffer_size must be called before spmm_optimize.");
     }
     spmm_descr->optimized_called = true;
     spmm_descr->last_optimized_opA = opA;
@@ -106,11 +103,10 @@ inline void common_spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
     spmm_descr->last_optimized_alg = alg;
 }
 
-void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-                   const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
-                   dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
-                   spmm_alg alg, spmm_descr_t spmm_descr,
-                   sycl::buffer<std::uint8_t, 1> /*workspace*/) {
+void spmm_optimize(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                   matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                   const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
+                   spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> /*workspace*/) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     if (!internal_A_handle->all_use_buffer()) {
         detail::throw_incompatible_container(__func__);
@@ -121,15 +117,14 @@ void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::
         return;
     }
     internal_A_handle->can_be_reset = false;
-    // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it.
+    // TODO: Add support for spmm_optimize once the Intel(R) oneMKL backends support it.
 }
 
-sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                          oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
-                          matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
-                          const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
-                          spmm_descr_t spmm_descr, void* /*workspace*/,
-                          const std::vector<sycl::event>& dependencies) {
+sycl::event spmm_optimize(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                          matrix_view A_view, matrix_handle_t A_handle,
+                          dense_matrix_handle_t B_handle, const void* beta,
+                          dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr,
+                          void* /*workspace*/, const std::vector<sycl::event>& dependencies) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     if (internal_A_handle->all_use_buffer()) {
         detail::throw_incompatible_container(__func__);
@@ -140,15 +135,15 @@ sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
         return detail::collapse_dependencies(queue, dependencies);
     }
     internal_A_handle->can_be_reset = false;
-    // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it.
+    // TODO: Add support for spmm_optimize once the Intel(R) oneMKL backends support it.
     return detail::collapse_dependencies(queue, dependencies);
 }
 
 template <typename T>
-sycl::event internal_spmm(sycl::queue& queue, oneapi::mkl::transpose opA,
-                          oneapi::mkl::transpose opB, const void* alpha, matrix_view /*A_view*/,
-                          matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
-                          const void* beta, dense_matrix_handle_t C_handle, spmm_alg /*alg*/,
+sycl::event internal_spmm(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                          matrix_view /*A_view*/, matrix_handle_t A_handle,
+                          dense_matrix_handle_t B_handle, const void* beta,
+                          dense_matrix_handle_t C_handle, spmm_alg /*alg*/,
                           spmm_descr_t /*spmm_descr*/, const std::vector<sycl::event>& dependencies,
                           bool is_alpha_host_accessible, bool is_beta_host_accessible) {
     T host_alpha =
@@ -157,38 +152,40 @@ sycl::event internal_spmm(sycl::queue& queue, oneapi::mkl::transpose opA,
         detail::get_scalar_on_host(queue, static_cast<const T*>(beta), is_beta_host_accessible);
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     internal_A_handle->can_be_reset = false;
-    auto layout = B_handle->dense_layout;
+    auto onemkl_layout = detail::get_onemkl_layout(B_handle->dense_layout);
+    auto onemkl_opa = detail::get_onemkl_transpose(opA);
+    auto onemkl_opb = detail::get_onemkl_transpose(opB);
     auto columns = C_handle->num_cols;
     auto ldb = B_handle->ld;
     auto ldc = C_handle->ld;
     if (internal_A_handle->all_use_buffer()) {
-        oneapi::mkl::sparse::gemm(queue, layout, opA, opB, host_alpha,
-                                  internal_A_handle->backend_handle, B_handle->get_buffer<T>(),
-                                  columns, ldb, host_beta, C_handle->get_buffer<T>(), ldc);
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::sparse::gemm(queue, onemkl_layout, onemkl_opa, onemkl_opb, host_alpha,
+                                      internal_A_handle->backend_handle, B_handle->get_buffer<T>(),
+                                      columns, ldb, host_beta, C_handle->get_buffer<T>(), ldc));
         // Dependencies are not used for buffers
         return {};
     }
     else {
-        return oneapi::mkl::sparse::gemm(queue, layout, opA, opB, host_alpha,
-                                         internal_A_handle->backend_handle,
-                                         B_handle->get_usm_ptr<T>(), columns, ldb, host_beta,
-                                         C_handle->get_usm_ptr<T>(), ldc, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::gemm(
+            queue, onemkl_layout, onemkl_opa, onemkl_opb, host_alpha,
+            internal_A_handle->backend_handle, B_handle->get_usm_ptr<T>(), columns, ldb, host_beta,
+            C_handle->get_usm_ptr<T>(), ldc, dependencies));
     }
 }
 
-sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-                 const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
-                 dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
-                 spmm_alg alg, spmm_descr_t spmm_descr,
-                 const std::vector<sycl::event>& dependencies) {
+sycl::event spmm(sycl::queue& queue, transpose opA, transpose opB, const void* alpha,
+                 matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
+                 const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
+                 spmm_descr_t spmm_descr, const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible,
                      is_beta_host_accessible);
 
     if (!spmm_descr->optimized_called) {
-        throw mkl::uninitialized("sparse_blas", __func__,
-                                 "spmm_optimize must be called before spmm.");
+        throw math::uninitialized("sparse_blas", __func__,
+                                  "spmm_optimize must be called before spmm.");
     }
     CHECK_DESCR_MATCH(spmm_descr, opA, "spmm_optimize");
     CHECK_DESCR_MATCH(spmm_descr, opB, "spmm_optimize");
@@ -199,9 +196,9 @@ sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::tr
     CHECK_DESCR_MATCH(spmm_descr, alg, "spmm_optimize");
 
     auto value_type = detail::get_internal_handle(A_handle)->get_value_type();
-    DISPATCH_MKL_OPERATION("spmm", value_type, internal_spmm, queue, opA, opB, alpha, A_view,
-                           A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies,
-                           is_alpha_host_accessible, is_beta_host_accessible);
+    DISPATCH_ONEMATH_OPERATION("spmm", value_type, internal_spmm, queue, opA, opB, alpha, A_view,
+                               A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies,
+                               is_alpha_host_accessible, is_beta_host_accessible);
 }
 
-} // namespace oneapi::mkl::sparse::BACKEND
+} // namespace oneapi::math::sparse::BACKEND
diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx
index 9fc43d8e9..3beea88aa 100644
--- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx
+++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx
@@ -17,14 +17,12 @@
 *
 **************************************************************************/
 
-// In this file functions and types using the namespace oneapi::mkl::sparse:: refer to the backend's namespace for better readability.
-
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 struct spmv_descr {
     bool buffer_size_called = false;
     bool optimized_called = false;
-    oneapi::mkl::transpose last_optimized_opA;
+    transpose last_optimized_opA;
     matrix_view last_optimized_A_view;
     matrix_handle_t last_optimized_A_handle;
     dense_vector_handle_t last_optimized_x_handle;
@@ -32,9 +30,9 @@ struct spmv_descr {
     spmv_alg last_optimized_alg;
 };
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-namespace oneapi::mkl::sparse::BACKEND {
+namespace oneapi::math::sparse::BACKEND {
 
 void init_spmv_descr(sycl::queue& /*queue*/, spmv_descr_t* p_spmv_descr) {
     *p_spmv_descr = new spmv_descr();
@@ -45,8 +43,8 @@ sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr,
     return detail::submit_release(queue, spmv_descr, dependencies);
 }
 
-void check_valid_spmv(const std::string& function_name, oneapi::mkl::transpose opA,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+void check_valid_spmv(const std::string& function_name, transpose opA, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       dense_vector_handle_t y_handle, bool is_alpha_host_accessible,
                       bool is_beta_host_accessible) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
@@ -55,18 +53,18 @@ void check_valid_spmv(const std::string& function_name, oneapi::mkl::transpose o
 
     if ((A_view.type_view == matrix_descr::symmetric ||
          A_view.type_view == matrix_descr::hermitian) &&
-        opA == oneapi::mkl::transpose::conjtrans) {
-        throw mkl::unimplemented(
+        opA == transpose::conjtrans) {
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support Symmetric or Hermitian matrix with `conjtrans`.");
     }
 }
 
-void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                      const void* beta, dense_vector_handle_t y_handle, spmv_alg /*alg*/,
-                      spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) {
-    // TODO: Add support for external workspace once the close-source oneMKL backend supports it.
+void spmv_buffer_size(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_vector_handle_t x_handle, const void* beta,
+                      dense_vector_handle_t y_handle, spmv_alg /*alg*/, spmv_descr_t spmv_descr,
+                      std::size_t& temp_buffer_size) {
+    // TODO: Add support for external workspace once the Intel(R) oneMKL backends support it.
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible,
@@ -75,7 +73,7 @@ void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void
     spmv_descr->buffer_size_called = true;
 }
 
-inline void common_spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+inline void common_spmv_optimize(sycl::queue& queue, transpose opA, const void* alpha,
                                  matrix_view A_view, matrix_handle_t A_handle,
                                  dense_vector_handle_t x_handle, const void* beta,
                                  dense_vector_handle_t y_handle, spmv_alg alg,
@@ -85,8 +83,8 @@ inline void common_spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
     check_valid_spmv("spmv_optimize", opA, A_view, A_handle, x_handle, y_handle,
                      is_alpha_host_accessible, is_beta_host_accessible);
     if (!spmv_descr->buffer_size_called) {
-        throw mkl::uninitialized("sparse_blas", "spmv_optimize",
-                                 "spmv_buffer_size must be called before spmv_optimize.");
+        throw math::uninitialized("sparse_blas", "spmv_optimize",
+                                  "spmv_buffer_size must be called before spmv_optimize.");
     }
     spmv_descr->optimized_called = true;
     spmv_descr->last_optimized_opA = opA;
@@ -97,10 +95,10 @@ inline void common_spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
     spmv_descr->last_optimized_alg = alg;
 }
 
-void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                   matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                   const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
-                   spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> /*workspace*/) {
+void spmv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                   matrix_handle_t A_handle, dense_vector_handle_t x_handle, const void* beta,
+                   dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
+                   sycl::buffer<std::uint8_t, 1> /*workspace*/) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     if (!internal_A_handle->all_use_buffer()) {
         detail::throw_incompatible_container(__func__);
@@ -111,9 +109,12 @@ void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* a
         return;
     }
     internal_A_handle->can_be_reset = false;
+    auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view);
+    auto onemkl_opa = detail::get_onemkl_transpose(opA);
+    auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view);
     if (A_view.type_view == matrix_descr::triangular) {
-        oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view,
-                                           internal_A_handle->backend_handle);
+        RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_trmv(
+            queue, onemkl_uplo, onemkl_opa, onemkl_diag, internal_A_handle->backend_handle));
     }
     else if (A_view.type_view == matrix_descr::symmetric ||
              A_view.type_view == matrix_descr::hermitian) {
@@ -121,15 +122,16 @@ void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* a
         return;
     }
     else {
-        oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle);
+        RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_gemv(
+            queue, onemkl_opa, internal_A_handle->backend_handle));
     }
 }
 
-sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                          matrix_view A_view, matrix_handle_t A_handle,
-                          dense_vector_handle_t x_handle, const void* beta,
-                          dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
-                          void* /*workspace*/, const std::vector<sycl::event>& dependencies) {
+sycl::event spmv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                          matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                          const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
+                          spmv_descr_t spmv_descr, void* /*workspace*/,
+                          const std::vector<sycl::event>& dependencies) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     if (internal_A_handle->all_use_buffer()) {
         detail::throw_incompatible_container(__func__);
@@ -140,25 +142,28 @@ sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const
         return detail::collapse_dependencies(queue, dependencies);
     }
     internal_A_handle->can_be_reset = false;
+    auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view);
+    auto onemkl_opa = detail::get_onemkl_transpose(opA);
+    auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view);
     if (A_view.type_view == matrix_descr::triangular) {
-        return oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view,
-                                                  internal_A_handle->backend_handle, dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(
+            oneapi::mkl::sparse::optimize_trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag,
+                                               internal_A_handle->backend_handle, dependencies));
     }
     else if (A_view.type_view == matrix_descr::symmetric ||
              A_view.type_view == matrix_descr::hermitian) {
         return detail::collapse_dependencies(queue, dependencies);
     }
     else {
-        return oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle,
-                                                  dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::optimize_gemv(
+            queue, onemkl_opa, internal_A_handle->backend_handle, dependencies));
     }
 }
 
 template <typename T>
-sycl::event internal_spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                          matrix_view A_view, matrix_handle_t A_handle,
-                          dense_vector_handle_t x_handle, const void* beta,
-                          dense_vector_handle_t y_handle, spmv_alg /*alg*/,
+sycl::event internal_spmv(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                          matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                          const void* beta, dense_vector_handle_t y_handle, spmv_alg /*alg*/,
                           spmv_descr_t /*spmv_descr*/, const std::vector<sycl::event>& dependencies,
                           bool is_alpha_host_accessible, bool is_beta_host_accessible) {
     T host_alpha =
@@ -168,21 +173,25 @@ sycl::event internal_spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     internal_A_handle->can_be_reset = false;
     auto backend_handle = internal_A_handle->backend_handle;
+    auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view);
+    auto onemkl_opa = detail::get_onemkl_transpose(opA);
+    auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view);
     if (internal_A_handle->all_use_buffer()) {
         auto x_buffer = x_handle->get_buffer<T>();
         auto y_buffer = y_handle->get_buffer<T>();
         if (A_view.type_view == matrix_descr::triangular) {
-            oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha,
-                                      backend_handle, x_buffer, host_beta, y_buffer);
+            RETHROW_ONEMKL_EXCEPTIONS(
+                oneapi::mkl::sparse::trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha,
+                                          backend_handle, x_buffer, host_beta, y_buffer));
         }
         else if (A_view.type_view == matrix_descr::symmetric ||
                  A_view.type_view == matrix_descr::hermitian) {
-            oneapi::mkl::sparse::symv(queue, A_view.uplo_view, host_alpha, backend_handle, x_buffer,
-                                      host_beta, y_buffer);
+            RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::symv(
+                queue, onemkl_uplo, host_alpha, backend_handle, x_buffer, host_beta, y_buffer));
         }
         else {
-            oneapi::mkl::sparse::gemv(queue, opA, host_alpha, backend_handle, x_buffer, host_beta,
-                                      y_buffer);
+            RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::gemv(
+                queue, onemkl_opa, host_alpha, backend_handle, x_buffer, host_beta, y_buffer));
         }
         // Dependencies are not used for buffers
         return {};
@@ -191,34 +200,36 @@ sycl::event internal_spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const
         auto x_usm = x_handle->get_usm_ptr<T>();
         auto y_usm = y_handle->get_usm_ptr<T>();
         if (A_view.type_view == matrix_descr::triangular) {
-            return oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view,
-                                             host_alpha, backend_handle, x_usm, host_beta, y_usm,
-                                             dependencies);
+            RETHROW_ONEMKL_EXCEPTIONS_RET(
+                oneapi::mkl::sparse::trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha,
+                                          backend_handle, x_usm, host_beta, y_usm, dependencies));
         }
         else if (A_view.type_view == matrix_descr::symmetric ||
                  A_view.type_view == matrix_descr::hermitian) {
-            return oneapi::mkl::sparse::symv(queue, A_view.uplo_view, host_alpha, backend_handle,
-                                             x_usm, host_beta, y_usm, dependencies);
+            RETHROW_ONEMKL_EXCEPTIONS_RET(
+                oneapi::mkl::sparse::symv(queue, onemkl_uplo, host_alpha, backend_handle, x_usm,
+                                          host_beta, y_usm, dependencies));
         }
         else {
-            return oneapi::mkl::sparse::gemv(queue, opA, host_alpha, backend_handle, x_usm,
-                                             host_beta, y_usm, dependencies);
+            RETHROW_ONEMKL_EXCEPTIONS_RET(
+                oneapi::mkl::sparse::gemv(queue, onemkl_opa, host_alpha, backend_handle, x_usm,
+                                          host_beta, y_usm, dependencies));
         }
     }
 }
 
-sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                 matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
-                 const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
-                 spmv_descr_t spmv_descr, const std::vector<sycl::event>& dependencies) {
+sycl::event spmv(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                 matrix_handle_t A_handle, dense_vector_handle_t x_handle, const void* beta,
+                 dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
+                 const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta);
     check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible,
                      is_beta_host_accessible);
 
     if (!spmv_descr->optimized_called) {
-        throw mkl::uninitialized("sparse_blas", __func__,
-                                 "spmv_optimize must be called before spmv.");
+        throw math::uninitialized("sparse_blas", __func__,
+                                  "spmv_optimize must be called before spmv.");
     }
     CHECK_DESCR_MATCH(spmv_descr, opA, "spmv_optimize");
     CHECK_DESCR_MATCH(spmv_descr, A_view, "spmv_optimize");
@@ -228,9 +239,9 @@ sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
     CHECK_DESCR_MATCH(spmv_descr, alg, "spmv_optimize");
 
     auto value_type = detail::get_internal_handle(A_handle)->get_value_type();
-    DISPATCH_MKL_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view, A_handle,
-                           x_handle, beta, y_handle, alg, spmv_descr, dependencies,
-                           is_alpha_host_accessible, is_beta_host_accessible);
+    DISPATCH_ONEMATH_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view,
+                               A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies,
+                               is_alpha_host_accessible, is_beta_host_accessible);
 }
 
-} // namespace oneapi::mkl::sparse::BACKEND
+} // namespace oneapi::math::sparse::BACKEND
diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx
index dd2a4f627..7832f90de 100644
--- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx
+++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx
@@ -17,14 +17,12 @@
 *
 **************************************************************************/
 
-// In this file functions and types using the namespace oneapi::mkl::sparse:: refer to the backend's namespace for better readability.
-
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
 struct spsv_descr {
     bool buffer_size_called = false;
     bool optimized_called = false;
-    oneapi::mkl::transpose last_optimized_opA;
+    transpose last_optimized_opA;
     matrix_view last_optimized_A_view;
     matrix_handle_t last_optimized_A_handle;
     dense_vector_handle_t last_optimized_x_handle;
@@ -32,9 +30,9 @@ struct spsv_descr {
     spsv_alg last_optimized_alg;
 };
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
 
-namespace oneapi::mkl::sparse::BACKEND {
+namespace oneapi::math::sparse::BACKEND {
 
 void init_spsv_descr(sycl::queue& /*queue*/, spsv_descr_t* p_spsv_descr) {
     *p_spsv_descr = new spsv_descr();
@@ -45,8 +43,8 @@ sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr,
     return detail::submit_release(queue, spsv_descr, dependencies);
 }
 
-void check_valid_spsv(const std::string& function_name, oneapi::mkl::transpose opA,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+void check_valid_spsv(const std::string& function_name, transpose opA, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       dense_vector_handle_t y_handle, bool is_alpha_host_accessible, spsv_alg alg) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     detail::check_valid_spsv_common(function_name, A_view, internal_A_handle, x_handle, y_handle,
@@ -54,7 +52,7 @@ void check_valid_spsv(const std::string& function_name, oneapi::mkl::transpose o
 
     if (alg == spsv_alg::no_optimize_alg &&
         !internal_A_handle->has_matrix_property(matrix_property::sorted)) {
-        throw mkl::unimplemented(
+        throw math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support `no_optimize_alg` unless A_handle has the property `matrix_property::sorted`.");
     }
@@ -63,20 +61,20 @@ void check_valid_spsv(const std::string& function_name, oneapi::mkl::transpose o
     detail::data_type data_type = internal_A_handle->get_value_type();
     if ((data_type == detail::data_type::complex_fp32 ||
          data_type == detail::data_type::complex_fp64) &&
-        opA == oneapi::mkl::transpose::conjtrans) {
-        throw mkl::unimplemented("sparse_blas", function_name,
-                                 "The backend does not support spsv using conjtrans.");
+        opA == oneapi::math::transpose::conjtrans) {
+        throw math::unimplemented("sparse_blas", function_name,
+                                  "The backend does not support spsv using conjtrans.");
     }
 #else
     (void)opA;
 #endif // BACKEND
 }
 
-void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                      matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+void spsv_buffer_size(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                      matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                       std::size_t& temp_buffer_size) {
-    // TODO: Add support for external workspace once the close-source oneMKL backend supports it.
+    // TODO: Add support for external workspace once the Intel(R) oneMKL backends support it.
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
     check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible,
                      alg);
@@ -84,7 +82,7 @@ void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void
     spsv_descr->buffer_size_called = true;
 }
 
-inline void common_spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+inline void common_spsv_optimize(sycl::queue& queue, transpose opA, const void* alpha,
                                  matrix_view A_view, matrix_handle_t A_handle,
                                  dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
                                  spsv_alg alg, spsv_descr_t spsv_descr) {
@@ -92,8 +90,8 @@ inline void common_spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
     check_valid_spsv("spsv_optimize", opA, A_view, A_handle, x_handle, y_handle,
                      is_alpha_host_accessible, alg);
     if (!spsv_descr->buffer_size_called) {
-        throw mkl::uninitialized("sparse_blas", "spsv_optimize",
-                                 "spsv_buffer_size must be called before spsv_optimize.");
+        throw math::uninitialized("sparse_blas", "spsv_optimize",
+                                  "spsv_buffer_size must be called before spsv_optimize.");
     }
     spsv_descr->optimized_called = true;
     spsv_descr->last_optimized_opA = opA;
@@ -104,8 +102,8 @@ inline void common_spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
     spsv_descr->last_optimized_alg = alg;
 }
 
-void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                   matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+void spsv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                   matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                    dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                    sycl::buffer<std::uint8_t, 1> /*workspace*/) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
@@ -117,15 +115,17 @@ void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* a
         return;
     }
     internal_A_handle->can_be_reset = false;
-    oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view,
-                                       internal_A_handle->backend_handle);
+    auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view);
+    auto onemkl_opa = detail::get_onemkl_transpose(opA);
+    auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view);
+    RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_trsv(
+        queue, onemkl_uplo, onemkl_opa, onemkl_diag, internal_A_handle->backend_handle));
 }
 
-sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                          matrix_view A_view, matrix_handle_t A_handle,
-                          dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
-                          spsv_alg alg, spsv_descr_t spsv_descr, void* /*workspace*/,
-                          const std::vector<sycl::event>& dependencies) {
+sycl::event spsv_optimize(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                          matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                          dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
+                          void* /*workspace*/, const std::vector<sycl::event>& dependencies) {
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     if (internal_A_handle->all_use_buffer()) {
         detail::throw_incompatible_container(__func__);
@@ -135,38 +135,45 @@ sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const
         return detail::collapse_dependencies(queue, dependencies);
     }
     internal_A_handle->can_be_reset = false;
-    return oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view,
-                                              internal_A_handle->backend_handle, dependencies);
+    auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view);
+    auto onemkl_opa = detail::get_onemkl_transpose(opA);
+    auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view);
+    RETHROW_ONEMKL_EXCEPTIONS_RET(
+        oneapi::mkl::sparse::optimize_trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag,
+                                           internal_A_handle->backend_handle, dependencies));
 }
 
 template <typename T>
-sycl::event internal_spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                          matrix_view A_view, matrix_handle_t A_handle,
-                          dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
-                          spsv_alg /*alg*/, spsv_descr_t /*spsv_descr*/,
-                          const std::vector<sycl::event>& dependencies,
+sycl::event internal_spsv(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                          matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+                          dense_vector_handle_t y_handle, spsv_alg /*alg*/,
+                          spsv_descr_t /*spsv_descr*/, const std::vector<sycl::event>& dependencies,
                           bool is_alpha_host_accessible) {
     T host_alpha =
         detail::get_scalar_on_host(queue, static_cast<const T*>(alpha), is_alpha_host_accessible);
     auto internal_A_handle = detail::get_internal_handle(A_handle);
     internal_A_handle->can_be_reset = false;
+    auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view);
+    auto onemkl_opa = detail::get_onemkl_transpose(opA);
+    auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view);
     if (internal_A_handle->all_use_buffer()) {
-        oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha,
-                                  internal_A_handle->backend_handle, x_handle->get_buffer<T>(),
-                                  y_handle->get_buffer<T>());
+        RETHROW_ONEMKL_EXCEPTIONS(
+            oneapi::mkl::sparse::trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha,
+                                      internal_A_handle->backend_handle, x_handle->get_buffer<T>(),
+                                      y_handle->get_buffer<T>()));
         // Dependencies are not used for buffers
         return {};
     }
     else {
-        return oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha,
-                                         internal_A_handle->backend_handle,
-                                         x_handle->get_usm_ptr<T>(), y_handle->get_usm_ptr<T>(),
-                                         dependencies);
+        RETHROW_ONEMKL_EXCEPTIONS_RET(
+            oneapi::mkl::sparse::trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha,
+                                      internal_A_handle->backend_handle, x_handle->get_usm_ptr<T>(),
+                                      y_handle->get_usm_ptr<T>(), dependencies));
     }
 }
 
-sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                 matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
+sycl::event spsv(sycl::queue& queue, transpose opA, const void* alpha, matrix_view A_view,
+                 matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                  dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                  const std::vector<sycl::event>& dependencies) {
     bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha);
@@ -174,8 +181,8 @@ sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
                      alg);
 
     if (!spsv_descr->optimized_called) {
-        throw mkl::uninitialized("sparse_blas", __func__,
-                                 "spsv_optimize must be called before spsv.");
+        throw math::uninitialized("sparse_blas", __func__,
+                                  "spsv_optimize must be called before spsv.");
     }
     CHECK_DESCR_MATCH(spsv_descr, opA, "spsv_optimize");
     CHECK_DESCR_MATCH(spsv_descr, A_view, "spsv_optimize");
@@ -185,9 +192,9 @@ sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
     CHECK_DESCR_MATCH(spsv_descr, alg, "spsv_optimize");
 
     auto value_type = detail::get_internal_handle(A_handle)->get_value_type();
-    DISPATCH_MKL_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view, A_handle,
-                           x_handle, y_handle, alg, spsv_descr, dependencies,
-                           is_alpha_host_accessible);
+    DISPATCH_ONEMATH_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view,
+                               A_handle, x_handle, y_handle, alg, spsv_descr, dependencies,
+                               is_alpha_host_accessible);
 }
 
-} // namespace oneapi::mkl::sparse::BACKEND
+} // namespace oneapi::math::sparse::BACKEND
diff --git a/src/sparse_blas/backends/mklcpu/CMakeLists.txt b/src/sparse_blas/backends/mklcpu/CMakeLists.txt
index e41cae268..973fdae20 100644
--- a/src/sparse_blas/backends/mklcpu/CMakeLists.txt
+++ b/src/sparse_blas/backends/mklcpu/CMakeLists.txt
@@ -17,51 +17,54 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_sparse_blas_mklcpu)
+set(LIB_NAME onemath_sparse_blas_mklcpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 include(WarningsUtils)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   mklcpu_handles.cpp
   mklcpu_operations.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: mklcpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_sparse_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_sparse_blas ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
+          ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::SPARSE)
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_SYCL::SPARSE
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 else()
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_DPCPP
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -74,8 +77,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp
index 0aaf91b25..f334040f6 100644
--- a/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp
+++ b/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp
@@ -17,13 +17,15 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp"
+#include "oneapi/math/sparse_blas/detail/mklcpu/onemath_sparse_blas_mklcpu.hpp"
+
+#include "common_onemkl_conversion.hpp"
 
 #include "sparse_blas/backends/mkl_common/mkl_dispatch.hpp"
 #include "sparse_blas/backends/mkl_common/mkl_handles.hpp"
 
-namespace oneapi::mkl::sparse::mklcpu {
+namespace oneapi::math::sparse::mklcpu {
 
 #include "sparse_blas/backends/mkl_common/mkl_handles.cxx"
 
-} // namespace oneapi::mkl::sparse::mklcpu
+} // namespace oneapi::math::sparse::mklcpu
diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp
index ebc8ceecf..62f3e30c9 100644
--- a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp
+++ b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp
@@ -17,6 +17,7 @@
 *
 **************************************************************************/
 
+#include "common_onemkl_conversion.hpp"
 #include "sparse_blas/backends/mkl_common/mkl_dispatch.hpp"
 #include "sparse_blas/backends/mkl_common/mkl_handles.hpp"
 #include "sparse_blas/common_op_verification.hpp"
@@ -24,7 +25,7 @@
 #include "sparse_blas/matrix_view_comparison.hpp"
 #include "sparse_blas/sycl_helper.hpp"
 
-#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp"
+#include "oneapi/math/sparse_blas/detail/mklcpu/onemath_sparse_blas_mklcpu.hpp"
 
 #define BACKEND mklcpu
 
diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp
index 1a6217684..ea5b7af32 100644
--- a/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp
+++ b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp
@@ -17,16 +17,16 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 
-#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp"
+#include "oneapi/math/sparse_blas/detail/mklcpu/onemath_sparse_blas_mklcpu.hpp"
 
 #include "sparse_blas/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         mklcpu
 
-extern "C" sparse_blas_function_table_t mkl_sparse_blas_table = {
+extern "C" sparse_blas_function_table_t onemath_sparse_blas_table = {
     WRAPPER_VERSION,
 #include "sparse_blas/backends/backend_wrappers.cxx"
 };
diff --git a/src/sparse_blas/backends/mklgpu/CMakeLists.txt b/src/sparse_blas/backends/mklgpu/CMakeLists.txt
index cd25babc2..84e6e097c 100644
--- a/src/sparse_blas/backends/mklgpu/CMakeLists.txt
+++ b/src/sparse_blas/backends/mklgpu/CMakeLists.txt
@@ -17,51 +17,54 @@
 # SPDX-License-Identifier: Apache-2.0
 #===============================================================================
 
-set(LIB_NAME onemkl_sparse_blas_mklgpu)
+set(LIB_NAME onemath_sparse_blas_mklgpu)
 set(LIB_OBJ ${LIB_NAME}_obj)
 
 include(WarningsUtils)
 
 add_library(${LIB_NAME})
+add_deprecated_library(${LIB_NAME})
 add_library(${LIB_OBJ} OBJECT
   mklgpu_handles.cpp
   mklgpu_operations.cpp
   $<$<BOOL:${BUILD_SHARED_LIBS}>: mklgpu_wrappers.cpp>
 )
-add_dependencies(onemkl_backend_libs_sparse_blas ${LIB_NAME})
+add_dependencies(onemath_backend_libs_sparse_blas ${LIB_NAME})
 
 target_include_directories(${LIB_OBJ}
   PRIVATE ${PROJECT_SOURCE_DIR}/include
           ${PROJECT_SOURCE_DIR}/src
+          ${PROJECT_SOURCE_DIR}/src/include
           ${CMAKE_BINARY_DIR}/bin
-          ${ONEMKL_GENERATED_INCLUDE_PATH}
+          ${ONEMATH_GENERATED_INCLUDE_PATH}
 )
 
-target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT})
+target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT})
 
 if(TARGET MKL::MKL_SYCL::SPARSE)
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_SYCL::SPARSE
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 else()
   target_link_libraries(${LIB_OBJ}
-    PUBLIC ONEMKL::SYCL::SYCL
+    PUBLIC ONEMATH::SYCL::SYCL
     PUBLIC MKL::MKL_DPCPP
-    PRIVATE onemkl_warnings
+    PRIVATE onemath_warnings
   )
 endif()
+target_add_intel_onemkl_include(${LIB_OBJ})
 
 set_target_properties(${LIB_OBJ} PROPERTIES
   POSITION_INDEPENDENT_CODE ON
 )
 target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})
 
-#Set oneMKL libraries as not transitive for dynamic
+#Set oneMath libraries as not transitive for dynamic
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIB_NAME} PROPERTIES
-    INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
+    INTERFACE_LINK_LIBRARIES ONEMATH::SYCL::SYCL
   )
 endif()
 
@@ -74,8 +77,8 @@ set_target_properties(${LIB_NAME} PROPERTIES
 list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)
 
 # Add the library to install package
-install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
-install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
+install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets)
+install(TARGETS ${LIB_NAME} EXPORT oneMathTargets
   RUNTIME DESTINATION bin
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp
index 648fed66e..78ecea702 100644
--- a/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp
+++ b/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp
@@ -17,13 +17,15 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp"
+#include "oneapi/math/sparse_blas/detail/mklgpu/onemath_sparse_blas_mklgpu.hpp"
+
+#include "common_onemkl_conversion.hpp"
 
 #include "sparse_blas/backends/mkl_common/mkl_dispatch.hpp"
 #include "sparse_blas/backends/mkl_common/mkl_handles.hpp"
 
-namespace oneapi::mkl::sparse::mklgpu {
+namespace oneapi::math::sparse::mklgpu {
 
 #include "sparse_blas/backends/mkl_common/mkl_handles.cxx"
 
-} // namespace oneapi::mkl::sparse::mklgpu
+} // namespace oneapi::math::sparse::mklgpu
diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp
index 1102306dc..36e2d20ba 100644
--- a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp
+++ b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp
@@ -17,6 +17,7 @@
 *
 **************************************************************************/
 
+#include "common_onemkl_conversion.hpp"
 #include "sparse_blas/backends/mkl_common/mkl_dispatch.hpp"
 #include "sparse_blas/backends/mkl_common/mkl_handles.hpp"
 #include "sparse_blas/common_op_verification.hpp"
@@ -24,7 +25,7 @@
 #include "sparse_blas/matrix_view_comparison.hpp"
 #include "sparse_blas/sycl_helper.hpp"
 
-#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp"
+#include "oneapi/math/sparse_blas/detail/mklgpu/onemath_sparse_blas_mklgpu.hpp"
 
 #define BACKEND mklgpu
 
diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp
index 4a261f64e..ed9cfd801 100644
--- a/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp
+++ b/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp
@@ -17,16 +17,16 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 
-#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp"
+#include "oneapi/math/sparse_blas/detail/mklgpu/onemath_sparse_blas_mklgpu.hpp"
 
 #include "sparse_blas/function_table.hpp"
 
 #define WRAPPER_VERSION 1
 #define BACKEND         mklgpu
 
-extern "C" sparse_blas_function_table_t mkl_sparse_blas_table = {
+extern "C" sparse_blas_function_table_t onemath_sparse_blas_table = {
     WRAPPER_VERSION,
 #include "sparse_blas/backends/backend_wrappers.cxx"
 };
diff --git a/src/sparse_blas/common_op_verification.hpp b/src/sparse_blas/common_op_verification.hpp
index 318766fb4..2bf38f9e0 100644
--- a/src/sparse_blas/common_op_verification.hpp
+++ b/src/sparse_blas/common_op_verification.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_COMMON_OP_VERIFICATION_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_COMMON_OP_VERIFICATION_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_COMMON_OP_VERIFICATION_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_COMMON_OP_VERIFICATION_HPP_
 
 #include <string>
 
@@ -28,17 +28,17 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 #include "macros.hpp"
 
-namespace oneapi::mkl::sparse::detail {
+namespace oneapi::math::sparse::detail {
 
 /// Throw an exception if the scalar is not accessible in the host
 inline void check_ptr_is_host_accessible(const std::string& function_name,
                                          const std::string& scalar_name,
                                          bool is_ptr_accessible_on_host) {
     if (!is_ptr_accessible_on_host) {
-        throw mkl::invalid_argument(
+        throw math::invalid_argument(
             "sparse_blas", function_name,
             "Scalar " + scalar_name + " must be accessible on the host for buffer functions.");
     }
@@ -59,28 +59,28 @@ void check_valid_spmm_common(const std::string& function_name, matrix_view A_vie
         check_ptr_is_host_accessible("spmm", "beta", is_beta_host_accessible);
     }
     if (is_alpha_host_accessible != is_beta_host_accessible) {
-        throw mkl::invalid_argument(
+        throw math::invalid_argument(
             "sparse_blas", function_name,
             "Alpha and beta must both be placed on host memory or device memory.");
     }
     if (B_handle->dense_layout != C_handle->dense_layout) {
-        throw mkl::invalid_argument("sparse_blas", function_name,
-                                    "B and C matrices must use the same layout.");
+        throw math::invalid_argument("sparse_blas", function_name,
+                                     "B and C matrices must use the same layout.");
     }
 
     if (A_view.type_view != matrix_descr::general) {
-        throw mkl::invalid_argument("sparse_blas", function_name,
-                                    "Matrix view's `type_view` must be `matrix_descr::general`.");
+        throw math::invalid_argument("sparse_blas", function_name,
+                                     "Matrix view's `type_view` must be `matrix_descr::general`.");
     }
 
-    if (A_view.diag_view != oneapi::mkl::diag::nonunit) {
-        throw mkl::invalid_argument("sparse_blas", function_name,
-                                    "Matrix's diag_view must be `nonunit`.");
+    if (A_view.diag_view != oneapi::math::diag::nonunit) {
+        throw math::invalid_argument("sparse_blas", function_name,
+                                     "Matrix's diag_view must be `nonunit`.");
     }
 }
 
 template <typename InternalSparseMatHandleT>
-void check_valid_spmv_common(const std::string& function_name, oneapi::mkl::transpose /*opA*/,
+void check_valid_spmv_common(const std::string& function_name, oneapi::math::transpose /*opA*/,
                              matrix_view A_view, InternalSparseMatHandleT internal_A_handle,
                              dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
                              bool is_alpha_host_accessible, bool is_beta_host_accessible) {
@@ -94,18 +94,18 @@ void check_valid_spmv_common(const std::string& function_name, oneapi::mkl::tran
         check_ptr_is_host_accessible("spmv", "beta", is_beta_host_accessible);
     }
     if (is_alpha_host_accessible != is_beta_host_accessible) {
-        throw mkl::invalid_argument(
+        throw math::invalid_argument(
             "sparse_blas", function_name,
             "Alpha and beta must both be placed on host memory or device memory.");
     }
     if (A_view.type_view == matrix_descr::diagonal) {
-        throw mkl::invalid_argument("sparse_blas", function_name,
-                                    "Matrix view's `type_view` cannot be diagonal.");
+        throw math::invalid_argument("sparse_blas", function_name,
+                                     "Matrix view's `type_view` cannot be diagonal.");
     }
 
     if (A_view.type_view != matrix_descr::triangular &&
-        A_view.diag_view == oneapi::mkl::diag::unit) {
-        throw mkl::invalid_argument(
+        A_view.diag_view == oneapi::math::diag::unit) {
+        throw math::invalid_argument(
             "sparse_blas", function_name,
             "`diag_view::unit` can only be used with `type_view::triangular`.");
     }
@@ -122,7 +122,7 @@ void check_valid_spsv_common(const std::string& function_name, matrix_view A_vie
 
     check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle);
     if (A_view.type_view != matrix_descr::triangular) {
-        throw mkl::invalid_argument(
+        throw math::invalid_argument(
             "sparse_blas", function_name,
             "Matrix view's `type_view` must be `matrix_descr::triangular`.");
     }
@@ -132,6 +132,6 @@ void check_valid_spsv_common(const std::string& function_name, matrix_view A_vie
     }
 }
 
-} // namespace oneapi::mkl::sparse::detail
+} // namespace oneapi::math::sparse::detail
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_COMMON_OP_VERIFICATION_HPP_
\ No newline at end of file
+#endif // _ONEMATH_SRC_SPARSE_BLAS_COMMON_OP_VERIFICATION_HPP_
\ No newline at end of file
diff --git a/src/sparse_blas/enum_data_types.hpp b/src/sparse_blas/enum_data_types.hpp
index 26946facb..7c7a2c5e3 100644
--- a/src/sparse_blas/enum_data_types.hpp
+++ b/src/sparse_blas/enum_data_types.hpp
@@ -17,12 +17,12 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_
 
 #include <string>
 
-namespace oneapi::mkl::sparse::detail {
+namespace oneapi::math::sparse::detail {
 
 enum data_type { none, int32, int64, real_fp32, real_fp64, complex_fp32, complex_fp64 };
 
@@ -64,6 +64,6 @@ constexpr data_type get_data_type() {
     }
 }
 
-} // namespace oneapi::mkl::sparse::detail
+} // namespace oneapi::math::sparse::detail
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_
+#endif // _ONEMATH_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_
diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp
index 429468ca1..6139a6381 100644
--- a/src/sparse_blas/function_table.hpp
+++ b/src/sparse_blas/function_table.hpp
@@ -17,87 +17,87 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_
-#define _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_FUNCTION_TABLE_HPP_
+#define _ONEMATH_SPARSE_BLAS_FUNCTION_TABLE_HPP_
 
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 #include "sparse_blas/macros.hpp"
 
 // Dense vector
-#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX)                                 \
-    void (*init_dense_vector_buffer##FP_SUFFIX)(                                      \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \
-        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                             \
-    void (*init_dense_vector_usm##FP_SUFFIX)(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \
-        std::int64_t size, FP_TYPE* val);                                             \
-    void (*set_dense_vector_data_buffer##FP_SUFFIX)(                                  \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle,     \
-        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                             \
-    void (*set_dense_vector_data_usm##FP_SUFFIX)(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle,     \
+#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX)                                  \
+    void (*init_dense_vector_buffer##FP_SUFFIX)(                                       \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \
+        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                              \
+    void (*init_dense_vector_usm##FP_SUFFIX)(                                          \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \
+        std::int64_t size, FP_TYPE* val);                                              \
+    void (*set_dense_vector_data_buffer##FP_SUFFIX)(                                   \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle,     \
+        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                              \
+    void (*set_dense_vector_data_usm##FP_SUFFIX)(                                      \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle,     \
         std::int64_t size, FP_TYPE* val)
 
 // Dense matrix
-#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX)                                 \
-    void (*init_dense_matrix_buffer##FP_SUFFIX)(                                      \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
-    void (*init_dense_matrix_usm##FP_SUFFIX)(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, FP_TYPE* val);                              \
-    void (*set_dense_matrix_data_buffer##FP_SUFFIX)(                                  \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle,     \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
-    void (*set_dense_matrix_data_usm##FP_SUFFIX)(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle,     \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, FP_TYPE* val)
+#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX)                                  \
+    void (*init_dense_matrix_buffer##FP_SUFFIX)(                                       \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
+    void (*init_dense_matrix_usm##FP_SUFFIX)(                                          \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, FP_TYPE* val);                              \
+    void (*set_dense_matrix_data_buffer##FP_SUFFIX)(                                   \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle,     \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
+    void (*set_dense_matrix_data_usm##FP_SUFFIX)(                                      \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle,     \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, FP_TYPE* val)
 
 // COO matrix
-#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)                          \
-    void (*init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX)(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, sycl::buffer<INT_TYPE, 1> row_ind,                          \
-        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                          \
-    void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)(                                            \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val);        \
-    void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,                    \
-        sycl::buffer<INT_TYPE, 1> row_ind, sycl::buffer<INT_TYPE, 1> col_ind,                      \
-        sycl::buffer<FP_TYPE, 1> val);                                                             \
-    void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)(                                        \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ind, \
-        INT_TYPE* col_ind, FP_TYPE* val)
+#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)                    \
+    void (*init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX)(                                   \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ind,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)(                                      \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val); \
+    void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)(                               \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ind,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)(                                  \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val)
 
 // CSR matrix
-#define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)                          \
-    void (*init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX)(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, sycl::buffer<INT_TYPE, 1> row_ptr,                          \
-        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                          \
-    void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)(                                            \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val);        \
-    void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,                    \
-        sycl::buffer<INT_TYPE, 1> row_ptr, sycl::buffer<INT_TYPE, 1> col_ind,                      \
-        sycl::buffer<FP_TYPE, 1> val);                                                             \
-    void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)(                                        \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ptr, \
-        INT_TYPE* col_ind, FP_TYPE* val)
+#define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)                    \
+    void (*init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX)(                                   \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ptr,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)(                                      \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val); \
+    void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)(                               \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ptr,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)(                                  \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val)
 
 typedef struct {
     int version;
@@ -105,13 +105,13 @@ typedef struct {
     // Dense vector
     FOR_EACH_FP_TYPE(DEFINE_DENSE_VECTOR_FUNCS);
     sycl::event (*release_dense_vector)(sycl::queue& queue,
-                                        oneapi::mkl::sparse::dense_vector_handle_t dvhandle,
+                                        oneapi::math::sparse::dense_vector_handle_t dvhandle,
                                         const std::vector<sycl::event>& dependencies);
 
     // Dense matrix
     FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS);
     sycl::event (*release_dense_matrix)(sycl::queue& queue,
-                                        oneapi::mkl::sparse::dense_matrix_handle_t dmhandle,
+                                        oneapi::math::sparse::dense_matrix_handle_t dmhandle,
                                         const std::vector<sycl::event>& dependencies);
 
     // COO matrix
@@ -122,140 +122,141 @@ typedef struct {
 
     // Common sparse matrix functions
     sycl::event (*release_sparse_matrix)(sycl::queue& queue,
-                                         oneapi::mkl::sparse::matrix_handle_t smhandle,
+                                         oneapi::math::sparse::matrix_handle_t smhandle,
                                          const std::vector<sycl::event>& dependencies);
 
-    bool (*set_matrix_property)(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle,
-                                oneapi::mkl::sparse::matrix_property property);
+    bool (*set_matrix_property)(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle,
+                                oneapi::math::sparse::matrix_property property);
 
     // SPMM
-    void (*init_spmm_descr)(sycl::queue& queue, oneapi::mkl::sparse::spmm_descr_t* p_spmm_descr);
+    void (*init_spmm_descr)(sycl::queue& queue, oneapi::math::sparse::spmm_descr_t* p_spmm_descr);
 
     sycl::event (*release_spmm_descr)(sycl::queue& queue,
-                                      oneapi::mkl::sparse::spmm_descr_t spmm_descr,
+                                      oneapi::math::sparse::spmm_descr_t spmm_descr,
                                       const std::vector<sycl::event>& dependencies);
 
-    void (*spmm_buffer_size)(sycl::queue& queue, oneapi::mkl::transpose opA,
-                             oneapi::mkl::transpose opB, const void* alpha,
-                             oneapi::mkl::sparse::matrix_view A_view,
-                             oneapi::mkl::sparse::matrix_handle_t A_handle,
-                             oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta,
-                             oneapi::mkl::sparse::dense_matrix_handle_t C_handle,
-                             oneapi::mkl::sparse::spmm_alg alg,
-                             oneapi::mkl::sparse::spmm_descr_t spmm_descr,
+    void (*spmm_buffer_size)(sycl::queue& queue, oneapi::math::transpose opA,
+                             oneapi::math::transpose opB, const void* alpha,
+                             oneapi::math::sparse::matrix_view A_view,
+                             oneapi::math::sparse::matrix_handle_t A_handle,
+                             oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta,
+                             oneapi::math::sparse::dense_matrix_handle_t C_handle,
+                             oneapi::math::sparse::spmm_alg alg,
+                             oneapi::math::sparse::spmm_descr_t spmm_descr,
                              std::size_t& temp_buffer_size);
 
     void (*spmm_optimize_buffer)(
-        sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-        const void* alpha, oneapi::mkl::sparse::matrix_view A_view,
-        oneapi::mkl::sparse::matrix_handle_t A_handle,
-        oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta,
-        oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg,
-        oneapi::mkl::sparse::spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace);
-
-    sycl::event (*spmm_optimize_usm)(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                     oneapi::mkl::transpose opB, const void* alpha,
-                                     oneapi::mkl::sparse::matrix_view A_view,
-                                     oneapi::mkl::sparse::matrix_handle_t A_handle,
-                                     oneapi::mkl::sparse::dense_matrix_handle_t B_handle,
+        sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB,
+        const void* alpha, oneapi::math::sparse::matrix_view A_view,
+        oneapi::math::sparse::matrix_handle_t A_handle,
+        oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta,
+        oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg,
+        oneapi::math::sparse::spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace);
+
+    sycl::event (*spmm_optimize_usm)(sycl::queue& queue, oneapi::math::transpose opA,
+                                     oneapi::math::transpose opB, const void* alpha,
+                                     oneapi::math::sparse::matrix_view A_view,
+                                     oneapi::math::sparse::matrix_handle_t A_handle,
+                                     oneapi::math::sparse::dense_matrix_handle_t B_handle,
                                      const void* beta,
-                                     oneapi::mkl::sparse::dense_matrix_handle_t C_handle,
-                                     oneapi::mkl::sparse::spmm_alg alg,
-                                     oneapi::mkl::sparse::spmm_descr_t spmm_descr, void* workspace,
+                                     oneapi::math::sparse::dense_matrix_handle_t C_handle,
+                                     oneapi::math::sparse::spmm_alg alg,
+                                     oneapi::math::sparse::spmm_descr_t spmm_descr, void* workspace,
                                      const std::vector<sycl::event>& dependencies);
 
-    sycl::event (*spmm)(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
-                        const void* alpha, oneapi::mkl::sparse::matrix_view A_view,
-                        oneapi::mkl::sparse::matrix_handle_t A_handle,
-                        oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta,
-                        oneapi::mkl::sparse::dense_matrix_handle_t C_handle,
-                        oneapi::mkl::sparse::spmm_alg alg,
-                        oneapi::mkl::sparse::spmm_descr_t spmm_descr,
+    sycl::event (*spmm)(sycl::queue& queue, oneapi::math::transpose opA,
+                        oneapi::math::transpose opB, const void* alpha,
+                        oneapi::math::sparse::matrix_view A_view,
+                        oneapi::math::sparse::matrix_handle_t A_handle,
+                        oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta,
+                        oneapi::math::sparse::dense_matrix_handle_t C_handle,
+                        oneapi::math::sparse::spmm_alg alg,
+                        oneapi::math::sparse::spmm_descr_t spmm_descr,
                         const std::vector<sycl::event>& dependencies);
 
     // SPMV
-    void (*init_spmv_descr)(sycl::queue& queue, oneapi::mkl::sparse::spmv_descr_t* p_spmv_descr);
+    void (*init_spmv_descr)(sycl::queue& queue, oneapi::math::sparse::spmv_descr_t* p_spmv_descr);
 
     sycl::event (*release_spmv_descr)(sycl::queue& queue,
-                                      oneapi::mkl::sparse::spmv_descr_t spmv_descr,
+                                      oneapi::math::sparse::spmv_descr_t spmv_descr,
                                       const std::vector<sycl::event>& dependencies);
 
-    void (*spmv_buffer_size)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                             oneapi::mkl::sparse::matrix_view A_view,
-                             oneapi::mkl::sparse::matrix_handle_t A_handle,
-                             oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta,
-                             oneapi::mkl::sparse::dense_vector_handle_t y_handle,
-                             oneapi::mkl::sparse::spmv_alg alg,
-                             oneapi::mkl::sparse::spmv_descr_t spmv_descr,
+    void (*spmv_buffer_size)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+                             oneapi::math::sparse::matrix_view A_view,
+                             oneapi::math::sparse::matrix_handle_t A_handle,
+                             oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta,
+                             oneapi::math::sparse::dense_vector_handle_t y_handle,
+                             oneapi::math::sparse::spmv_alg alg,
+                             oneapi::math::sparse::spmv_descr_t spmv_descr,
                              std::size_t& temp_buffer_size);
 
     void (*spmv_optimize_buffer)(
-        sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-        oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle,
-        oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta,
-        oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg,
-        oneapi::mkl::sparse::spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> workspace);
-
-    sycl::event (*spmv_optimize_usm)(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                     const void* alpha, oneapi::mkl::sparse::matrix_view A_view,
-                                     oneapi::mkl::sparse::matrix_handle_t A_handle,
-                                     oneapi::mkl::sparse::dense_vector_handle_t x_handle,
+        sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+        oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle,
+        oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta,
+        oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg,
+        oneapi::math::sparse::spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> workspace);
+
+    sycl::event (*spmv_optimize_usm)(sycl::queue& queue, oneapi::math::transpose opA,
+                                     const void* alpha, oneapi::math::sparse::matrix_view A_view,
+                                     oneapi::math::sparse::matrix_handle_t A_handle,
+                                     oneapi::math::sparse::dense_vector_handle_t x_handle,
                                      const void* beta,
-                                     oneapi::mkl::sparse::dense_vector_handle_t y_handle,
-                                     oneapi::mkl::sparse::spmv_alg alg,
-                                     oneapi::mkl::sparse::spmv_descr_t spmv_descr, void* workspace,
+                                     oneapi::math::sparse::dense_vector_handle_t y_handle,
+                                     oneapi::math::sparse::spmv_alg alg,
+                                     oneapi::math::sparse::spmv_descr_t spmv_descr, void* workspace,
                                      const std::vector<sycl::event>& dependencies);
 
-    sycl::event (*spmv)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                        oneapi::mkl::sparse::matrix_view A_view,
-                        oneapi::mkl::sparse::matrix_handle_t A_handle,
-                        oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta,
-                        oneapi::mkl::sparse::dense_vector_handle_t y_handle,
-                        oneapi::mkl::sparse::spmv_alg alg,
-                        oneapi::mkl::sparse::spmv_descr_t spmv_descr,
+    sycl::event (*spmv)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+                        oneapi::math::sparse::matrix_view A_view,
+                        oneapi::math::sparse::matrix_handle_t A_handle,
+                        oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta,
+                        oneapi::math::sparse::dense_vector_handle_t y_handle,
+                        oneapi::math::sparse::spmv_alg alg,
+                        oneapi::math::sparse::spmv_descr_t spmv_descr,
                         const std::vector<sycl::event>& dependencies);
 
     // SPSV
-    void (*init_spsv_descr)(sycl::queue& queue, oneapi::mkl::sparse::spsv_descr_t* p_spsv_descr);
+    void (*init_spsv_descr)(sycl::queue& queue, oneapi::math::sparse::spsv_descr_t* p_spsv_descr);
 
     sycl::event (*release_spsv_descr)(sycl::queue& queue,
-                                      oneapi::mkl::sparse::spsv_descr_t spsv_descr,
+                                      oneapi::math::sparse::spsv_descr_t spsv_descr,
                                       const std::vector<sycl::event>& dependencies);
 
-    void (*spsv_buffer_size)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                             oneapi::mkl::sparse::matrix_view A_view,
-                             oneapi::mkl::sparse::matrix_handle_t A_handle,
-                             oneapi::mkl::sparse::dense_vector_handle_t x_handle,
-                             oneapi::mkl::sparse::dense_vector_handle_t y_handle,
-                             oneapi::mkl::sparse::spsv_alg alg,
-                             oneapi::mkl::sparse::spsv_descr_t spsv_descr,
+    void (*spsv_buffer_size)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+                             oneapi::math::sparse::matrix_view A_view,
+                             oneapi::math::sparse::matrix_handle_t A_handle,
+                             oneapi::math::sparse::dense_vector_handle_t x_handle,
+                             oneapi::math::sparse::dense_vector_handle_t y_handle,
+                             oneapi::math::sparse::spsv_alg alg,
+                             oneapi::math::sparse::spsv_descr_t spsv_descr,
                              std::size_t& temp_buffer_size);
 
-    void (*spsv_optimize_buffer)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                                 oneapi::mkl::sparse::matrix_view A_view,
-                                 oneapi::mkl::sparse::matrix_handle_t A_handle,
-                                 oneapi::mkl::sparse::dense_vector_handle_t x_handle,
-                                 oneapi::mkl::sparse::dense_vector_handle_t y_handle,
-                                 oneapi::mkl::sparse::spsv_alg alg,
-                                 oneapi::mkl::sparse::spsv_descr_t spsv_descr,
+    void (*spsv_optimize_buffer)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+                                 oneapi::math::sparse::matrix_view A_view,
+                                 oneapi::math::sparse::matrix_handle_t A_handle,
+                                 oneapi::math::sparse::dense_vector_handle_t x_handle,
+                                 oneapi::math::sparse::dense_vector_handle_t y_handle,
+                                 oneapi::math::sparse::spsv_alg alg,
+                                 oneapi::math::sparse::spsv_descr_t spsv_descr,
                                  sycl::buffer<std::uint8_t, 1> workspace);
 
-    sycl::event (*spsv_optimize_usm)(sycl::queue& queue, oneapi::mkl::transpose opA,
-                                     const void* alpha, oneapi::mkl::sparse::matrix_view A_view,
-                                     oneapi::mkl::sparse::matrix_handle_t A_handle,
-                                     oneapi::mkl::sparse::dense_vector_handle_t x_handle,
-                                     oneapi::mkl::sparse::dense_vector_handle_t y_handle,
-                                     oneapi::mkl::sparse::spsv_alg alg,
-                                     oneapi::mkl::sparse::spsv_descr_t spsv_descr, void* workspace,
+    sycl::event (*spsv_optimize_usm)(sycl::queue& queue, oneapi::math::transpose opA,
+                                     const void* alpha, oneapi::math::sparse::matrix_view A_view,
+                                     oneapi::math::sparse::matrix_handle_t A_handle,
+                                     oneapi::math::sparse::dense_vector_handle_t x_handle,
+                                     oneapi::math::sparse::dense_vector_handle_t y_handle,
+                                     oneapi::math::sparse::spsv_alg alg,
+                                     oneapi::math::sparse::spsv_descr_t spsv_descr, void* workspace,
                                      const std::vector<sycl::event>& dependencies);
 
-    sycl::event (*spsv)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
-                        oneapi::mkl::sparse::matrix_view A_view,
-                        oneapi::mkl::sparse::matrix_handle_t A_handle,
-                        oneapi::mkl::sparse::dense_vector_handle_t x_handle,
-                        oneapi::mkl::sparse::dense_vector_handle_t y_handle,
-                        oneapi::mkl::sparse::spsv_alg alg,
-                        oneapi::mkl::sparse::spsv_descr_t spsv_descr,
+    sycl::event (*spsv)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
+                        oneapi::math::sparse::matrix_view A_view,
+                        oneapi::math::sparse::matrix_handle_t A_handle,
+                        oneapi::math::sparse::dense_vector_handle_t x_handle,
+                        oneapi::math::sparse::dense_vector_handle_t y_handle,
+                        oneapi::math::sparse::spsv_alg alg,
+                        oneapi::math::sparse::spsv_descr_t spsv_descr,
                         const std::vector<sycl::event>& dependencies);
 } sparse_blas_function_table_t;
 
@@ -264,4 +265,4 @@ typedef struct {
 #undef DEFINE_COO_MATRIX_FUNCS
 #undef DEFINE_CSR_MATRIX_FUNCS
 
-#endif // _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_FUNCTION_TABLE_HPP_
diff --git a/src/sparse_blas/generic_container.hpp b/src/sparse_blas/generic_container.hpp
index c2e8476a7..d99c2a3de 100644
--- a/src/sparse_blas/generic_container.hpp
+++ b/src/sparse_blas/generic_container.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_
 
 #include <memory>
 #include <string>
@@ -29,10 +29,11 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/sparse_blas/types.hpp"
+#include "oneapi/math/exceptions.hpp"
+#include "oneapi/math/sparse_blas/types.hpp"
 #include "enum_data_types.hpp"
 
-namespace oneapi::mkl::sparse::detail {
+namespace oneapi::math::sparse::detail {
 
 /// Represent a non-templated container for USM or buffer.
 struct generic_container {
@@ -162,7 +163,7 @@ struct generic_dense_vector_handle : public detail::generic_dense_handle<Backend
             : generic_dense_handle<BackendHandleT>(backend_handle, value_buffer),
               size(size) {
         if (value_buffer.size() < static_cast<std::size_t>(size)) {
-            throw oneapi::mkl::invalid_argument(
+            throw oneapi::math::invalid_argument(
                 "sparse_blas", "init_dense_vector",
                 "Buffer size too small, expected at least " + std::to_string(size) + " but got " +
                     std::to_string(value_buffer.size()) + " elements.");
@@ -176,7 +177,7 @@ struct generic_dense_matrix_handle : public detail::generic_dense_handle<Backend
     std::int64_t num_rows;
     std::int64_t num_cols;
     std::int64_t ld;
-    oneapi::mkl::layout dense_layout;
+    oneapi::math::layout dense_layout;
 
     template <typename T>
     generic_dense_matrix_handle(BackendHandleT backend_handle, T* value_ptr, std::int64_t num_rows,
@@ -197,9 +198,9 @@ struct generic_dense_matrix_handle : public detail::generic_dense_handle<Backend
               ld(ld),
               dense_layout(dense_layout) {
         std::size_t minimum_size = static_cast<std::size_t>(
-            (dense_layout == oneapi::mkl::layout::row_major ? num_rows : num_cols) * ld);
+            (dense_layout == oneapi::math::layout::row_major ? num_rows : num_cols) * ld);
         if (value_buffer.size() < minimum_size) {
-            throw oneapi::mkl::invalid_argument(
+            throw oneapi::math::invalid_argument(
                 "sparse_blas", "init_dense_matrix",
                 "Buffer size too small, expected at least " + std::to_string(minimum_size) +
                     " but got " + std::to_string(value_buffer.size()) + " elements.");
@@ -222,14 +223,14 @@ struct generic_sparse_handle {
     std::int64_t num_rows;
     std::int64_t num_cols;
     std::int64_t nnz;
-    oneapi::mkl::index_base index;
+    index_base index;
     std::int32_t properties_mask;
     bool can_be_reset;
 
     template <typename fpType, typename intType>
     generic_sparse_handle(BackendHandleT backend_handle, intType* row_ptr, intType* col_ptr,
                           fpType* value_ptr, sparse_format format, std::int64_t num_rows,
-                          std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index)
+                          std::int64_t num_cols, std::int64_t nnz, index_base index)
             : backend_handle(backend_handle),
               row_container(generic_container(row_ptr)),
               col_container(generic_container(col_ptr)),
@@ -247,7 +248,7 @@ struct generic_sparse_handle {
                           const sycl::buffer<intType, 1> col_buffer,
                           const sycl::buffer<fpType, 1> value_buffer, sparse_format format,
                           std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,
-                          oneapi::mkl::index_base index)
+                          index_base index)
             : backend_handle(backend_handle),
               row_container(row_buffer),
               col_container(col_buffer),
@@ -288,7 +289,7 @@ struct generic_sparse_handle {
             case matrix_property::sorted: return 1 << 1;
             case matrix_property::sorted_by_rows: return 1 << 2;
             default:
-                throw oneapi::mkl::invalid_argument(
+                throw oneapi::math::invalid_argument(
                     "sparse_blas", "set_matrix_property",
                     "Unsupported matrix property " + std::to_string(static_cast<int>(property)));
         }
@@ -296,7 +297,7 @@ struct generic_sparse_handle {
 };
 
 inline void throw_incompatible_container(const std::string& function_name) {
-    throw oneapi::mkl::invalid_argument(
+    throw oneapi::math::invalid_argument(
         "sparse_blas", function_name,
         "Incompatible container types. All inputs and outputs must use the same container: buffer or USM");
 }
@@ -332,17 +333,17 @@ void check_all_containers_compatible(const std::string& function_name,
     for (const auto internal_container : { internal_containers... }) {
         const data_type other_value_type = internal_container->get_value_type();
         if (other_value_type != first_value_type) {
-            throw oneapi::mkl::invalid_argument(
+            throw oneapi::math::invalid_argument(
                 "sparse_blas", function_name,
                 "Incompatible data types expected " + data_type_to_str(first_value_type) +
                     " but got " + data_type_to_str(other_value_type));
         }
         const data_type other_int_type = internal_container->get_int_type();
         if (other_int_type != data_type::none && other_int_type != first_int_type) {
-            throw oneapi::mkl::invalid_argument("sparse_blas", function_name,
-                                                "Incompatible integer types expected " +
-                                                    data_type_to_str(first_int_type) + " but got " +
-                                                    data_type_to_str(other_int_type));
+            throw oneapi::math::invalid_argument(
+                "sparse_blas", function_name,
+                "Incompatible integer types expected " + data_type_to_str(first_int_type) +
+                    " but got " + data_type_to_str(other_int_type));
         }
     }
 }
@@ -351,14 +352,14 @@ template <typename fpType, typename InternalHandleT>
 void check_can_reset_value_handle(const std::string& function_name,
                                   InternalHandleT* internal_handle, bool expect_buffer) {
     if (internal_handle->get_value_type() != detail::get_data_type<fpType>()) {
-        throw oneapi::mkl::invalid_argument(
+        throw oneapi::math::invalid_argument(
             "sparse_blas", function_name,
             "Incompatible data types expected " +
                 data_type_to_str(internal_handle->get_value_type()) + " but got " +
                 data_type_to_str(detail::get_data_type<fpType>()));
     }
     if (internal_handle->all_use_buffer() != expect_buffer) {
-        throw oneapi::mkl::invalid_argument(
+        throw oneapi::math::invalid_argument(
             "sparse_blas", function_name, "Cannot change the container type between buffer or USM");
     }
 }
@@ -368,19 +369,19 @@ void check_can_reset_sparse_handle(const std::string& function_name,
                                    InternalHandleT* internal_smhandle, bool expect_buffer) {
     check_can_reset_value_handle<fpType>(function_name, internal_smhandle, expect_buffer);
     if (internal_smhandle->get_int_type() != detail::get_data_type<intType>()) {
-        throw oneapi::mkl::invalid_argument(
+        throw oneapi::math::invalid_argument(
             "sparse_blas", function_name,
             "Incompatible data types expected " +
                 data_type_to_str(internal_smhandle->get_int_type()) + " but got " +
                 data_type_to_str(detail::get_data_type<intType>()));
     }
     if (!internal_smhandle->can_be_reset) {
-        throw mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "sparse_blas", function_name,
             "The backend does not support reseting the matrix handle's data after it was used in a computation.");
     }
 }
 
-} // namespace oneapi::mkl::sparse::detail
+} // namespace oneapi::math::sparse::detail
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_
+#endif // _ONEMATH_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_
diff --git a/src/sparse_blas/macros.hpp b/src/sparse_blas/macros.hpp
index 72aa39a75..2de06b720 100644
--- a/src/sparse_blas/macros.hpp
+++ b/src/sparse_blas/macros.hpp
@@ -17,8 +17,8 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef _ONEMKL_SPARSE_BLAS_MACROS_HPP_
-#define _ONEMKL_SPARSE_BLAS_MACROS_HPP_
+#ifndef _ONEMATH_SPARSE_BLAS_MACROS_HPP_
+#define _ONEMATH_SPARSE_BLAS_MACROS_HPP_
 
 #define FOR_EACH_FP_TYPE(DEFINE_MACRO)      \
     DEFINE_MACRO(float, _rf);               \
@@ -36,91 +36,91 @@
     FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int32_t, _i32); \
     FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int64_t, _i64)
 
-#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX)                            \
-    template void init_dense_vector<FP_TYPE>(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \
-        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                             \
-    template void init_dense_vector<FP_TYPE>(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \
-        std::int64_t size, FP_TYPE* val);                                             \
-    template void set_dense_vector_data<FP_TYPE>(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle,     \
-        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                             \
-    template void set_dense_vector_data<FP_TYPE>(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle,     \
+#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX)                             \
+    template void init_dense_vector<FP_TYPE>(                                          \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \
+        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                              \
+    template void init_dense_vector<FP_TYPE>(                                          \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \
+        std::int64_t size, FP_TYPE* val);                                              \
+    template void set_dense_vector_data<FP_TYPE>(                                      \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle,     \
+        std::int64_t size, sycl::buffer<FP_TYPE, 1> val);                              \
+    template void set_dense_vector_data<FP_TYPE>(                                      \
+        sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle,     \
         std::int64_t size, FP_TYPE* val)
 
-#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX)                            \
-    template void init_dense_matrix<FP_TYPE>(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
-    template void init_dense_matrix<FP_TYPE>(                                         \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, FP_TYPE* val);                              \
-    template void set_dense_matrix_data<FP_TYPE>(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle,     \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
-    template void set_dense_matrix_data<FP_TYPE>(                                     \
-        sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle,     \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                \
-        oneapi::mkl::layout dense_layout, FP_TYPE* val)
+#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX)                             \
+    template void init_dense_matrix<FP_TYPE>(                                          \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
+    template void init_dense_matrix<FP_TYPE>(                                          \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, FP_TYPE* val);                              \
+    template void set_dense_matrix_data<FP_TYPE>(                                      \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle,     \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, sycl::buffer<FP_TYPE, 1> val);              \
+    template void set_dense_matrix_data<FP_TYPE>(                                      \
+        sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle,     \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld,                 \
+        oneapi::math::layout dense_layout, FP_TYPE* val)
 
-#define INSTANTIATE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)                     \
-    template void init_coo_matrix<FP_TYPE, INT_TYPE>(                                              \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, sycl::buffer<INT_TYPE, 1> row_ind,                          \
-        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                          \
-    template void init_coo_matrix<FP_TYPE, INT_TYPE>(                                              \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val);        \
-    template void set_coo_matrix_data<FP_TYPE, INT_TYPE>(                                          \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,                    \
-        sycl::buffer<INT_TYPE, 1> row_ind, sycl::buffer<INT_TYPE, 1> col_ind,                      \
-        sycl::buffer<FP_TYPE, 1> val);                                                             \
-    template void set_coo_matrix_data<FP_TYPE, INT_TYPE>(                                          \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ind, \
-        INT_TYPE* col_ind, FP_TYPE* val)
+#define INSTANTIATE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)               \
+    template void init_coo_matrix<FP_TYPE, INT_TYPE>(                                        \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ind,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    template void init_coo_matrix<FP_TYPE, INT_TYPE>(                                        \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val); \
+    template void set_coo_matrix_data<FP_TYPE, INT_TYPE>(                                    \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ind,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    template void set_coo_matrix_data<FP_TYPE, INT_TYPE>(                                    \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val)
 
-#define INSTANTIATE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)                     \
-    template void init_csr_matrix<FP_TYPE, INT_TYPE>(                                              \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, sycl::buffer<INT_TYPE, 1> row_ptr,                          \
-        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                          \
-    template void init_csr_matrix<FP_TYPE, INT_TYPE>(                                              \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle,                    \
-        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                            \
-        oneapi::mkl::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val);        \
-    template void set_csr_matrix_data<FP_TYPE, INT_TYPE>(                                          \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,                    \
-        sycl::buffer<INT_TYPE, 1> row_ptr, sycl::buffer<INT_TYPE, 1> col_ind,                      \
-        sycl::buffer<FP_TYPE, 1> val);                                                             \
-    template void set_csr_matrix_data<FP_TYPE, INT_TYPE>(                                          \
-        sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \
-        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ptr, \
-        INT_TYPE* col_ind, FP_TYPE* val)
+#define INSTANTIATE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX)               \
+    template void init_csr_matrix<FP_TYPE, INT_TYPE>(                                        \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ptr,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    template void init_csr_matrix<FP_TYPE, INT_TYPE>(                                        \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle,             \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val); \
+    template void set_csr_matrix_data<FP_TYPE, INT_TYPE>(                                    \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, sycl::buffer<INT_TYPE, 1> row_ptr,                   \
+        sycl::buffer<INT_TYPE, 1> col_ind, sycl::buffer<FP_TYPE, 1> val);                    \
+    template void set_csr_matrix_data<FP_TYPE, INT_TYPE>(                                    \
+        sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle,                 \
+        std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz,                      \
+        oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val)
 
-#define THROW_IF_NULLPTR(FUNC_NAME, PTR)                                       \
-    if (!(PTR)) {                                                              \
-        throw mkl::uninitialized("sparse_blas", FUNC_NAME,                     \
-                                 std::string(#PTR) + " must not be nullptr."); \
+#define THROW_IF_NULLPTR(FUNC_NAME, PTR)                                        \
+    if (!(PTR)) {                                                               \
+        throw math::uninitialized("sparse_blas", FUNC_NAME,                     \
+                                  std::string(#PTR) + " must not be nullptr."); \
     }
 
 #define CHECK_DESCR_MATCH(descr, argument, optimize_func_name)                                    \
     do {                                                                                          \
         if (descr->last_optimized_##argument != argument) {                                       \
-            throw mkl::invalid_argument(                                                          \
+            throw math::invalid_argument(                                                         \
                 "sparse_blas", __func__,                                                          \
                 #argument " argument must match with the previous call to " #optimize_func_name); \
         }                                                                                         \
     } while (0)
 
-#endif // _ONEMKL_SPARSE_BLAS_MACROS_HPP_
+#endif // _ONEMATH_SPARSE_BLAS_MACROS_HPP_
diff --git a/src/sparse_blas/matrix_view_comparison.hpp b/src/sparse_blas/matrix_view_comparison.hpp
index e01be7311..08c2f8c54 100644
--- a/src/sparse_blas/matrix_view_comparison.hpp
+++ b/src/sparse_blas/matrix_view_comparison.hpp
@@ -17,20 +17,20 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_
 
-#include "oneapi/mkl/sparse_blas/matrix_view.hpp"
+#include "oneapi/math/sparse_blas/matrix_view.hpp"
 
-inline bool operator==(const oneapi::mkl::sparse::matrix_view& lhs,
-                       const oneapi::mkl::sparse::matrix_view& rhs) {
+inline bool operator==(const oneapi::math::sparse::matrix_view& lhs,
+                       const oneapi::math::sparse::matrix_view& rhs) {
     return lhs.type_view == rhs.type_view && lhs.uplo_view == rhs.uplo_view &&
            lhs.diag_view == rhs.diag_view;
 }
 
-inline bool operator!=(const oneapi::mkl::sparse::matrix_view& lhs,
-                       const oneapi::mkl::sparse::matrix_view& rhs) {
+inline bool operator!=(const oneapi::math::sparse::matrix_view& lhs,
+                       const oneapi::math::sparse::matrix_view& rhs) {
     return !(lhs == rhs);
 }
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_
\ No newline at end of file
+#endif // _ONEMATH_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_
\ No newline at end of file
diff --git a/src/sparse_blas/sparse_blas_loader.cpp b/src/sparse_blas/sparse_blas_loader.cpp
index f236b4626..2053dffcb 100644
--- a/src/sparse_blas/sparse_blas_loader.cpp
+++ b/src/sparse_blas/sparse_blas_loader.cpp
@@ -17,17 +17,17 @@
 *
 **************************************************************************/
 
-#include "oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp"
+#include "oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp"
 
 #include "function_table_initializer.hpp"
 #include "sparse_blas/function_table.hpp"
 #include "sparse_blas/macros.hpp"
-#include "oneapi/mkl/detail/get_device_id.hpp"
+#include "oneapi/math/detail/get_device_id.hpp"
 
-namespace oneapi::mkl::sparse {
+namespace oneapi::math::sparse {
 
-static oneapi::mkl::detail::table_initializer<mkl::domain::sparse_blas,
-                                              sparse_blas_function_table_t>
+static oneapi::math::detail::table_initializer<math::domain::sparse_blas,
+                                               sparse_blas_function_table_t>
     function_tables;
 
 // Dense vector
@@ -214,7 +214,7 @@ sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr,
     return function_tables[{ libkey, queue }].release_spmm_descr(queue, spmm_descr, dependencies);
 }
 
-void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+void spmm_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB,
                       const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                       dense_matrix_handle_t B_handle, const void* beta,
                       dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr,
@@ -225,7 +225,7 @@ void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mk
                                                         temp_buffer_size);
 }
 
-void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+void spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB,
                    const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                    dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
                    spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer<std::uint8_t, 1> workspace) {
@@ -235,8 +235,8 @@ void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::
                                                             spmm_descr, workspace);
 }
 
-sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
-                          oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view,
+sycl::event spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA,
+                          oneapi::math::transpose opB, const void* alpha, matrix_view A_view,
                           matrix_handle_t A_handle, dense_matrix_handle_t B_handle,
                           const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg,
                           spmm_descr_t spmm_descr, void* workspace,
@@ -247,7 +247,7 @@ sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA,
         workspace, dependencies);
 }
 
-sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+sycl::event spmm(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB,
                  const void* alpha, matrix_view A_view, matrix_handle_t A_handle,
                  dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle,
                  spmm_alg alg, spmm_descr_t spmm_descr,
@@ -270,7 +270,7 @@ sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr,
     return function_tables[{ libkey, queue }].release_spmv_descr(queue, spmv_descr, dependencies);
 }
 
-void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spmv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                       matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                       spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) {
@@ -280,7 +280,7 @@ void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void
                                                         temp_buffer_size);
 }
 
-void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                    matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                    const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                    spmv_descr_t spmv_descr, sycl::buffer<std::uint8_t, 1> workspace) {
@@ -289,7 +289,7 @@ void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* a
         queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, workspace);
 }
 
-sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                           matrix_view A_view, matrix_handle_t A_handle,
                           dense_vector_handle_t x_handle, const void* beta,
                           dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr,
@@ -300,7 +300,7 @@ sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const
         dependencies);
 }
 
-sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                  matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                  const void* beta, dense_vector_handle_t y_handle, spmv_alg alg,
                  spmv_descr_t spmv_descr, const std::vector<sycl::event>& dependencies) {
@@ -321,7 +321,7 @@ sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr,
     return function_tables[{ libkey, queue }].release_spsv_descr(queue, spsv_descr, dependencies);
 }
 
-void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spsv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                       matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                       dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                       std::size_t& temp_buffer_size) {
@@ -330,7 +330,7 @@ void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void
         queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, temp_buffer_size);
 }
 
-void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+void spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                    matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                    dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                    sycl::buffer<std::uint8_t, 1> workspace) {
@@ -339,7 +339,7 @@ void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* a
         queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, workspace);
 }
 
-sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                           matrix_view A_view, matrix_handle_t A_handle,
                           dense_vector_handle_t x_handle, dense_vector_handle_t y_handle,
                           spsv_alg alg, spsv_descr_t spsv_descr, void* workspace,
@@ -350,7 +350,7 @@ sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const
                                                                 workspace, dependencies);
 }
 
-sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha,
+sycl::event spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha,
                  matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle,
                  dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr,
                  const std::vector<sycl::event>& dependencies) {
@@ -359,4 +359,4 @@ sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alp
                                                    y_handle, alg, spsv_descr, dependencies);
 }
 
-} // namespace oneapi::mkl::sparse
+} // namespace oneapi::math::sparse
diff --git a/src/sparse_blas/sycl_helper.hpp b/src/sparse_blas/sycl_helper.hpp
index 1a055b405..074d1cd0e 100644
--- a/src/sparse_blas/sycl_helper.hpp
+++ b/src/sparse_blas/sycl_helper.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef _ONEMKL_SRC_SPARSE_BLAS_SYCL_HELPER_HPP_
-#define _ONEMKL_SRC_SPARSE_BLAS_SYCL_HELPER_HPP_
+#ifndef _ONEMATH_SRC_SPARSE_BLAS_SYCL_HELPER_HPP_
+#define _ONEMATH_SRC_SPARSE_BLAS_SYCL_HELPER_HPP_
 
 #if __has_include(<sycl/sycl.hpp>)
 #include <sycl/sycl.hpp>
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-namespace oneapi::mkl::sparse::detail {
+namespace oneapi::math::sparse::detail {
 
 /// Return whether a pointer is accessible on the host
 template <typename T>
@@ -75,6 +75,6 @@ inline sycl::event collapse_dependencies(sycl::queue& queue,
     });
 }
 
-} // namespace oneapi::mkl::sparse::detail
+} // namespace oneapi::math::sparse::detail
 
-#endif // _ONEMKL_SRC_SPARSE_BLAS_SYCL_HELPER_HPP_
+#endif // _ONEMATH_SRC_SPARSE_BLAS_SYCL_HELPER_HPP_
diff --git a/tests/README.md b/tests/README.md
index 3a8346057..6c3ee1584 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,11 +1,11 @@
-# oneMKL Interfaces Testing
+# oneMath Testing
 
 ## Overview
 Inside the `unit_tests` directory, there are domain-level directories which contain domain-specific tests, usually per function or per configuration.
 
-See [Building and Running Tests](https://oneapi-src.github.io/oneMKL/building_and_running_tests.html) documentation for more information about how to build and run the tests.
+See [Building and Running Tests](https://uxlfoundation.github.io/oneMath/building_and_running_tests.html) documentation for more information about how to build and run the tests.
 
 [GoogleTest](https://github.com/google/googletest) is used as the unit-testing framework.
 
 
-*Refer to `<path to onemkl>/deps/googletest/LICENSE` for GoogleTest license.*
+*Refer to `<path to onemath>/deps/googletest/LICENSE` for GoogleTest license.*
diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt
index d250a03a0..13108dd32 100644
--- a/tests/unit_tests/CMakeLists.txt
+++ b/tests/unit_tests/CMakeLists.txt
@@ -110,7 +110,7 @@ foreach(domain ${TEST_TARGET_DOMAINS})
   if(BUILD_SHARED_LIBS)
     add_executable(test_main_${domain}_rt main_test.cpp)
     target_include_directories(test_main_${domain}_rt PUBLIC ${GTEST_INCLUDE_DIR})
-    if(NOT ${ONEMKL_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
+    if(NOT ${ONEMATH_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
       target_compile_options(test_main_${domain}_rt PRIVATE -fsycl)
     endif()
     target_link_libraries(test_main_${domain}_rt PUBLIC
@@ -118,8 +118,8 @@ foreach(domain ${TEST_TARGET_DOMAINS})
       gtest_main
       ${CMAKE_DL_LIBS}
       ${${domain}_TEST_LINK}
-      ONEMKL::SYCL::SYCL
-      onemkl
+      ONEMATH::SYCL::SYCL
+      onemath
       ${${domain}_TEST_LIST_RT}
     )
     if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
@@ -128,73 +128,73 @@ foreach(domain ${TEST_TARGET_DOMAINS})
   endif()
 
   if(ENABLE_MKLCPU_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_mklcpu)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_mklcpu)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_mklcpu)
   endif()
 
   if(ENABLE_MKLGPU_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_mklgpu)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklgpu)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_mklgpu)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_mklgpu)
   endif()
 
   if(domain STREQUAL "blas" AND ENABLE_CUBLAS_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_cublas)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_cublas)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_cublas)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_cublas)
   endif()
 
   if(domain STREQUAL "blas" AND ENABLE_ROCBLAS_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_rocblas)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_rocblas)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_rocblas)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_rocblas)
   endif()
 
   if(domain STREQUAL "blas" AND ENABLE_NETLIB_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_netlib)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_netlib)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_netlib)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_netlib)
   endif()
 
   if(domain STREQUAL "blas" AND ENABLE_PORTBLAS_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_portblas)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_portblas)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_portblas)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_portblas)
   endif()
 
   if(domain STREQUAL "lapack" AND ENABLE_CUSOLVER_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_cusolver)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_cusolver)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_cusolver)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_cusolver)
   endif()
 
   if(domain STREQUAL "lapack" AND ENABLE_ROCSOLVER_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_rocsolver)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_rocsolver)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_rocsolver)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_rocsolver)
   endif()
   
   if(domain STREQUAL "rng" AND ENABLE_CURAND_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_curand)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_curand)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_curand)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_curand)
   endif()
 
   if(domain STREQUAL "rng" AND ENABLE_ROCRAND_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_rocrand)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_rocrand)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_rocrand)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_rocrand)
   endif()
 
   if(domain STREQUAL "dft" AND ENABLE_CUFFT_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_cufft)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_cufft)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_cufft)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_cufft)
   endif()
 
   if(domain STREQUAL "dft" AND ENABLE_ROCFFT_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_dft_rocfft)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_dft_rocfft)
+    add_dependencies(test_main_${domain}_ct onemath_dft_rocfft)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_dft_rocfft)
   endif()
 
   if(domain STREQUAL "dft" AND ENABLE_PORTFFT_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_dft_portfft)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_dft_portfft)
+    add_dependencies(test_main_${domain}_ct onemath_dft_portfft)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_dft_portfft)
   endif()
 
   if(domain STREQUAL "sparse_blas" AND ENABLE_CUSPARSE_BACKEND)
-    add_dependencies(test_main_${domain}_ct onemkl_${domain}_cusparse)
-    list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_cusparse)
+    add_dependencies(test_main_${domain}_ct onemath_${domain}_cusparse)
+    list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_cusparse)
   endif()
 
   target_link_libraries(test_main_${domain}_ct PUBLIC
@@ -202,13 +202,13 @@ foreach(domain ${TEST_TARGET_DOMAINS})
       gtest_main
       ${CMAKE_DL_LIBS}
       ${${domain}_TEST_LINK}
-      ${ONEMKL_LIBRARIES_${domain}}
-      ONEMKL::SYCL::SYCL
+      ${ONEMATH_LIBRARIES_${domain}}
+      ONEMATH::SYCL::SYCL
       ${${domain}_TEST_LIST_CT}
       ${${domain}_DEVICE_TEST_LIST_CT}
   )
   
-  if(NOT ${ONEMKL_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
+  if(NOT ${ONEMATH_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
     target_link_options(test_main_${domain}_ct PUBLIC -fsycl-device-code-split=per_kernel)
   endif()
 
@@ -222,7 +222,7 @@ foreach(domain ${TEST_TARGET_DOMAINS})
 
   if(BUILD_SHARED_LIBS)
     set_target_properties(test_main_${domain}_rt
-      PROPERTIES BUILD_RPATH $<TARGET_FILE_DIR:onemkl>)
+      PROPERTIES BUILD_RPATH $<TARGET_FILE_DIR:onemath>)
   # Find individual tests within executable
     gtest_discover_tests(test_main_${domain}_rt
       PROPERTIES BUILD_RPATH ${CMAKE_BINARY_DIR}/lib
diff --git a/tests/unit_tests/blas/CMakeLists.txt b/tests/unit_tests/blas/CMakeLists.txt
index bd37d55f3..c80d0043a 100644
--- a/tests/unit_tests/blas/CMakeLists.txt
+++ b/tests/unit_tests/blas/CMakeLists.txt
@@ -19,8 +19,8 @@
 
 # Add compile definitions for the reference libraries
 add_compile_definitions(
-  ONEMKL_REF_BLAS_LIBNAME="${ONEMKL_REF_BLAS_LIBNAME}"
-  ONEMKL_REF_CBLAS_LIBNAME="${ONEMKL_REF_CBLAS_LIBNAME}")
+  ONEMATH_REF_BLAS_LIBNAME="${ONEMATH_REF_BLAS_LIBNAME}"
+  ONEMATH_REF_CBLAS_LIBNAME="${ONEMATH_REF_CBLAS_LIBNAME}")
 
 add_subdirectory(level1)
 add_subdirectory(level2)
diff --git a/tests/unit_tests/blas/batch/CMakeLists.txt b/tests/unit_tests/blas/batch/CMakeLists.txt
index a3c6cb252..eb4e45935 100644
--- a/tests/unit_tests/blas/batch/CMakeLists.txt
+++ b/tests/unit_tests/blas/batch/CMakeLists.txt
@@ -41,7 +41,7 @@ if(BUILD_SHARED_LIBS)
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET blas_batch_rt SOURCES ${BATCH_SOURCES})
   else()
-    target_link_libraries(blas_batch_rt PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(blas_batch_rt PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 endif()
 
@@ -58,5 +58,5 @@ target_include_directories(blas_batch_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET blas_batch_ct SOURCES ${BATCH_SOURCES})
 else()
-  target_link_libraries(blas_batch_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(blas_batch_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
diff --git a/tests/unit_tests/blas/batch/axpy_batch_stride.cpp b/tests/unit_tests/blas/batch/axpy_batch_stride.cpp
index e311237a1..708921e9e 100644
--- a/tests/unit_tests/blas/batch/axpy_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/axpy_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp alpha,
+int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, fp alpha,
          int64_t batch_size) {
     // Prepare data.
     int64_t n, i;
@@ -105,27 +105,27 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::axpy_batch(main_queue, n, alpha, x_buffer, incx,
-                                                            stride_x, y_buffer, incy, stride_y,
-                                                            batch_size);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::axpy_batch(main_queue, n, alpha, x_buffer, incx,
+                                                             stride_x, y_buffer, incy, stride_y,
+                                                             batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::axpy_batch(main_queue, n, alpha, x_buffer, incx,
-                                                         stride_x, y_buffer, incy, stride_y,
-                                                         batch_size);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::axpy_batch(main_queue, n, alpha, x_buffer, incx,
+                                                          stride_x, y_buffer, incy, stride_y,
+                                                          batch_size);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpy_batch, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy_batch, n,
                                         alpha, x_buffer, incx, stride_x, y_buffer, incy, stride_y,
                                         batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpy_batch, n,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy_batch, n,
                                         alpha, x_buffer, incx, stride_x, y_buffer, incy, stride_y,
                                         batch_size);
                 break;
@@ -139,7 +139,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -160,7 +160,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp
 }
 
 class AxpyBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(AxpyBatchStrideTests, RealSinglePrecision) {
     float alpha = 2.0;
@@ -208,8 +208,8 @@ TEST_P(AxpyBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AxpyBatchStrideTestSuite, AxpyBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp
index 36f260e10..fa5646ac9 100644
--- a/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp alpha,
+int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, fp alpha,
          int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -110,28 +110,28 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::axpy_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::axpy_batch(
                     main_queue, n, alpha, &x[0], incx, stride_x, &y[0], incy, stride_y, batch_size,
                     dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::axpy_batch(main_queue, n, alpha, &x[0], incx,
-                                                                stride_x, &y[0], incy, stride_y,
-                                                                batch_size, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::axpy_batch(main_queue, n, alpha, &x[0], incx,
+                                                                 stride_x, &y[0], incy, stride_y,
+                                                                 batch_size, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpy_batch, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy_batch, n,
                                         alpha, &x[0], incx, stride_x, &y[0], incy, stride_y,
                                         batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpy_batch, n,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy_batch, n,
                                         alpha, &x[0], incx, stride_x, &y[0], incy, stride_y,
                                         batch_size, dependencies);
                 break;
@@ -146,7 +146,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -166,7 +166,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp
 }
 
 class AxpyBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(AxpyBatchStrideUsmTests, RealSinglePrecision) {
     float alpha = 2.0;
@@ -214,8 +214,8 @@ TEST_P(AxpyBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AxpyBatchStrideUsmTestSuite, AxpyBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/axpy_batch_usm.cpp b/tests/unit_tests/blas/batch/axpy_batch_usm.cpp
index a65367eb0..bc70aef9d 100644
--- a/tests/unit_tests/blas/batch/axpy_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/axpy_batch_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -69,23 +69,24 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     std::vector<event> dependencies;
 
     // Prepare data.
-    int64_t* n = (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+    int64_t* n =
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
     int64_t* incx =
-        (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
     int64_t* incy =
-        (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
-    fp* alpha = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * group_count, *dev, cxt);
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+    fp* alpha = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * group_count, *dev, cxt);
     int64_t* group_size =
-        (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
 
     if ((n == NULL) || (incx == NULL) || (incy == NULL) || (alpha == NULL) ||
         (group_size == NULL)) {
         std::cout << "Error cannot allocate input arrays\n";
-        oneapi::mkl::free_shared(n, cxt);
-        oneapi::mkl::free_shared(incx, cxt);
-        oneapi::mkl::free_shared(incy, cxt);
-        oneapi::mkl::free_shared(alpha, cxt);
-        oneapi::mkl::free_shared(group_size, cxt);
+        oneapi::math::free_shared(n, cxt);
+        oneapi::math::free_shared(incx, cxt);
+        oneapi::math::free_shared(incy, cxt);
+        oneapi::math::free_shared(alpha, cxt);
+        oneapi::math::free_shared(group_size, cxt);
         return false;
     }
 
@@ -103,16 +104,18 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         total_batch_count += group_size[i];
     }
 
-    fp** x_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
-    fp** y_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
+    fp** x_array =
+        (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
+    fp** y_array =
+        (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
     fp** y_ref_array =
-        (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
+        (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
 
     if ((x_array == NULL) || (y_array == NULL) || (y_ref_array == NULL)) {
         std::cout << "Error cannot allocate arrays of pointers\n";
-        oneapi::mkl::free_shared(x_array, cxt);
-        oneapi::mkl::free_shared(y_array, cxt);
-        oneapi::mkl::free_shared(y_ref_array, cxt);
+        oneapi::math::free_shared(x_array, cxt);
+        oneapi::math::free_shared(y_array, cxt);
+        oneapi::math::free_shared(y_ref_array, cxt);
         return false;
     }
     idx = 0;
@@ -121,11 +124,11 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
             total_size_x = (1 + (n[i] - 1) * std::abs(incx[i]));
             total_size_y = (1 + (n[i] - 1) * std::abs(incy[i]));
             x_array[idx] =
-                (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt);
+                (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt);
             y_array[idx] =
-                (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
+                (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
             y_ref_array[idx] =
-                (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
+                (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
             rand_vector(x_array[idx], n[i], incx[i]);
             rand_vector(y_array[idx], n[i], incy[i]);
             copy_vector(y_array[idx], n[i], incy[i], y_ref_array[idx]);
@@ -154,13 +157,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::axpy_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::axpy_batch(
                     main_queue, n, alpha, (const fp**)x_array, incx, y_array, incy, group_count,
                     group_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::axpy_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::axpy_batch(
                     main_queue, n, alpha, (const fp**)x_array, incx, y_array, incy, group_count,
                     group_size, dependencies);
                 break;
@@ -169,13 +172,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpy_batch, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy_batch, n,
                                         alpha, (const fp**)x_array, incx, y_array, incy,
                                         group_count, group_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpy_batch, n,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy_batch, n,
                                         alpha, (const fp**)x_array, incx, y_array, incy,
                                         group_count, group_size, dependencies);
                 break;
@@ -190,24 +193,24 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(x_array[idx], cxt);
-                oneapi::mkl::free_shared(y_array[idx], cxt);
-                oneapi::mkl::free_shared(y_ref_array[idx], cxt);
+                oneapi::math::free_shared(x_array[idx], cxt);
+                oneapi::math::free_shared(y_array[idx], cxt);
+                oneapi::math::free_shared(y_ref_array[idx], cxt);
                 idx++;
             }
         }
-        oneapi::mkl::free_shared(n, cxt);
-        oneapi::mkl::free_shared(incx, cxt);
-        oneapi::mkl::free_shared(incy, cxt);
-        oneapi::mkl::free_shared(alpha, cxt);
-        oneapi::mkl::free_shared(group_size, cxt);
-        oneapi::mkl::free_shared(x_array, cxt);
-        oneapi::mkl::free_shared(y_array, cxt);
-        oneapi::mkl::free_shared(y_ref_array, cxt);
+        oneapi::math::free_shared(n, cxt);
+        oneapi::math::free_shared(incx, cxt);
+        oneapi::math::free_shared(incy, cxt);
+        oneapi::math::free_shared(alpha, cxt);
+        oneapi::math::free_shared(group_size, cxt);
+        oneapi::math::free_shared(x_array, cxt);
+        oneapi::math::free_shared(y_array, cxt);
+        oneapi::math::free_shared(y_ref_array, cxt);
         return test_skipped;
     }
 
@@ -230,26 +233,26 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(x_array[idx], cxt);
-            oneapi::mkl::free_shared(y_array[idx], cxt);
-            oneapi::mkl::free_shared(y_ref_array[idx], cxt);
+            oneapi::math::free_shared(x_array[idx], cxt);
+            oneapi::math::free_shared(y_array[idx], cxt);
+            oneapi::math::free_shared(y_ref_array[idx], cxt);
             idx++;
         }
     }
-    oneapi::mkl::free_shared(n, cxt);
-    oneapi::mkl::free_shared(incx, cxt);
-    oneapi::mkl::free_shared(incy, cxt);
-    oneapi::mkl::free_shared(alpha, cxt);
-    oneapi::mkl::free_shared(group_size, cxt);
-    oneapi::mkl::free_shared(x_array, cxt);
-    oneapi::mkl::free_shared(y_array, cxt);
-    oneapi::mkl::free_shared(y_ref_array, cxt);
+    oneapi::math::free_shared(n, cxt);
+    oneapi::math::free_shared(incx, cxt);
+    oneapi::math::free_shared(incy, cxt);
+    oneapi::math::free_shared(alpha, cxt);
+    oneapi::math::free_shared(group_size, cxt);
+    oneapi::math::free_shared(x_array, cxt);
+    oneapi::math::free_shared(y_array, cxt);
+    oneapi::math::free_shared(y_ref_array, cxt);
 
     return (int)good;
 }
 
 class AxpyBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(AxpyBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -275,8 +278,8 @@ TEST_P(AxpyBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AxpyBatchUsmTestSuite, AxpyBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/copy_batch_stride.cpp b/tests/unit_tests/blas/batch/copy_batch_stride.cpp
index ff51e1c6d..48bec1291 100644
--- a/tests/unit_tests/blas/batch/copy_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/copy_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
     // Prepare data.
     int64_t n, i;
 
@@ -104,25 +104,25 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::copy_batch(main_queue, n, x_buffer, incx, stride_x,
-                                                            y_buffer, incy, stride_y, batch_size);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::copy_batch(
+                    main_queue, n, x_buffer, incx, stride_x, y_buffer, incy, stride_y, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::copy_batch(main_queue, n, x_buffer, incx, stride_x,
-                                                         y_buffer, incy, stride_y, batch_size);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::copy_batch(main_queue, n, x_buffer, incx, stride_x,
+                                                          y_buffer, incy, stride_y, batch_size);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::copy_batch, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::copy_batch, n,
                                         x_buffer, incx, stride_x, y_buffer, incy, stride_y,
                                         batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::copy_batch, n,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy_batch, n,
                                         x_buffer, incx, stride_x, y_buffer, incy, stride_y,
                                         batch_size);
                 break;
@@ -136,7 +136,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -157,7 +157,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
 }
 
 class CopyBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(CopyBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 15));
@@ -195,8 +195,8 @@ TEST_P(CopyBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(CopyBatchStrideTestSuite, CopyBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp
index 062054d55..291040948 100644
--- a/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -109,28 +109,28 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::copy_batch(main_queue, n, &x[0], incx,
-                                                                   stride_x, &y[0], incy, stride_y,
-                                                                   batch_size, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::copy_batch(main_queue, n, &x[0], incx,
+                                                                    stride_x, &y[0], incy, stride_y,
+                                                                    batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::copy_batch(main_queue, n, &x[0], incx,
-                                                                stride_x, &y[0], incy, stride_y,
-                                                                batch_size, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::copy_batch(main_queue, n, &x[0], incx,
+                                                                 stride_x, &y[0], incy, stride_y,
+                                                                 batch_size, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::copy_batch, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::copy_batch, n,
                                         &x[0], incx, stride_x, &y[0], incy, stride_y, batch_size,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::copy_batch, n,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy_batch, n,
                                         &x[0], incx, stride_x, &y[0], incy, stride_y, batch_size,
                                         dependencies);
                 break;
@@ -145,7 +145,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -165,7 +165,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
 }
 
 class CopyBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(CopyBatchStrideUsmTests, RealSinglePrecision) {
     float alpha = 2.0;
@@ -207,8 +207,8 @@ TEST_P(CopyBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(CopyBatchStrideUsmTestSuite, CopyBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/copy_batch_usm.cpp b/tests/unit_tests/blas/batch/copy_batch_usm.cpp
index ce051a046..69f93b1a7 100644
--- a/tests/unit_tests/blas/batch/copy_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/copy_batch_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -69,20 +69,21 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     std::vector<event> dependencies;
 
     // Prepare data.
-    int64_t* n = (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+    int64_t* n =
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
     int64_t* incx =
-        (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
     int64_t* incy =
-        (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
     int64_t* group_size =
-        (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
+        (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt);
 
     if ((n == NULL) || (incx == NULL) || (incy == NULL) || (group_size == NULL)) {
         std::cout << "Error cannot allocate input arrays\n";
-        oneapi::mkl::free_shared(n, cxt);
-        oneapi::mkl::free_shared(incx, cxt);
-        oneapi::mkl::free_shared(incy, cxt);
-        oneapi::mkl::free_shared(group_size, cxt);
+        oneapi::math::free_shared(n, cxt);
+        oneapi::math::free_shared(incx, cxt);
+        oneapi::math::free_shared(incy, cxt);
+        oneapi::math::free_shared(group_size, cxt);
         return false;
     }
 
@@ -99,16 +100,18 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         total_batch_count += group_size[i];
     }
 
-    fp** x_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
-    fp** y_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
+    fp** x_array =
+        (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
+    fp** y_array =
+        (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
     fp** y_ref_array =
-        (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
+        (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt);
 
     if ((x_array == NULL) || (y_array == NULL) || (y_ref_array == NULL)) {
         std::cout << "Error cannot allocate arrays of pointers\n";
-        oneapi::mkl::free_shared(x_array, cxt);
-        oneapi::mkl::free_shared(y_array, cxt);
-        oneapi::mkl::free_shared(y_ref_array, cxt);
+        oneapi::math::free_shared(x_array, cxt);
+        oneapi::math::free_shared(y_array, cxt);
+        oneapi::math::free_shared(y_ref_array, cxt);
         return false;
     }
     idx = 0;
@@ -117,11 +120,11 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
             total_size_x = (1 + (n[i] - 1) * std::abs(incx[i]));
             total_size_y = (1 + (n[i] - 1) * std::abs(incy[i]));
             x_array[idx] =
-                (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt);
+                (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt);
             y_array[idx] =
-                (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
+                (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
             y_ref_array[idx] =
-                (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
+                (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt);
             rand_vector(x_array[idx], n[i], incx[i]);
             rand_vector(y_array[idx], n[i], incy[i]);
             copy_vector(y_array[idx], n[i], incy[i], y_ref_array[idx]);
@@ -150,28 +153,28 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::copy_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::copy_batch(
                     main_queue, n, (const fp**)x_array, incx, y_array, incy, group_count,
                     group_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::copy_batch(main_queue, n, (const fp**)x_array,
-                                                                incx, y_array, incy, group_count,
-                                                                group_size, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::copy_batch(main_queue, n, (const fp**)x_array,
+                                                                 incx, y_array, incy, group_count,
+                                                                 group_size, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::copy_batch, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::copy_batch, n,
                                         (const fp**)x_array, incx, y_array, incy, group_count,
                                         group_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::copy_batch, n,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy_batch, n,
                                         (const fp**)x_array, incx, y_array, incy, group_count,
                                         group_size, dependencies);
                 break;
@@ -186,23 +189,23 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(x_array[idx], cxt);
-                oneapi::mkl::free_shared(y_array[idx], cxt);
-                oneapi::mkl::free_shared(y_ref_array[idx], cxt);
+                oneapi::math::free_shared(x_array[idx], cxt);
+                oneapi::math::free_shared(y_array[idx], cxt);
+                oneapi::math::free_shared(y_ref_array[idx], cxt);
                 idx++;
             }
         }
-        oneapi::mkl::free_shared(n, cxt);
-        oneapi::mkl::free_shared(incx, cxt);
-        oneapi::mkl::free_shared(incy, cxt);
-        oneapi::mkl::free_shared(group_size, cxt);
-        oneapi::mkl::free_shared(x_array, cxt);
-        oneapi::mkl::free_shared(y_array, cxt);
-        oneapi::mkl::free_shared(y_ref_array, cxt);
+        oneapi::math::free_shared(n, cxt);
+        oneapi::math::free_shared(incx, cxt);
+        oneapi::math::free_shared(incy, cxt);
+        oneapi::math::free_shared(group_size, cxt);
+        oneapi::math::free_shared(x_array, cxt);
+        oneapi::math::free_shared(y_array, cxt);
+        oneapi::math::free_shared(y_ref_array, cxt);
         return test_skipped;
     }
 
@@ -225,25 +228,25 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(x_array[idx], cxt);
-            oneapi::mkl::free_shared(y_array[idx], cxt);
-            oneapi::mkl::free_shared(y_ref_array[idx], cxt);
+            oneapi::math::free_shared(x_array[idx], cxt);
+            oneapi::math::free_shared(y_array[idx], cxt);
+            oneapi::math::free_shared(y_ref_array[idx], cxt);
             idx++;
         }
     }
-    oneapi::mkl::free_shared(n, cxt);
-    oneapi::mkl::free_shared(incx, cxt);
-    oneapi::mkl::free_shared(incy, cxt);
-    oneapi::mkl::free_shared(group_size, cxt);
-    oneapi::mkl::free_shared(x_array, cxt);
-    oneapi::mkl::free_shared(y_array, cxt);
-    oneapi::mkl::free_shared(y_ref_array, cxt);
+    oneapi::math::free_shared(n, cxt);
+    oneapi::math::free_shared(incx, cxt);
+    oneapi::math::free_shared(incy, cxt);
+    oneapi::math::free_shared(group_size, cxt);
+    oneapi::math::free_shared(x_array, cxt);
+    oneapi::math::free_shared(y_array, cxt);
+    oneapi::math::free_shared(y_ref_array, cxt);
 
     return (int)good;
 }
 
 class CopyBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(CopyBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -269,8 +272,8 @@ TEST_P(CopyBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(CopyBatchUsmTestSuite, CopyBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp b/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp
index 5e4bd82d8..9fa01c52a 100644
--- a/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, int64_t incx,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right, int64_t incx,
          int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
@@ -61,7 +61,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     lda = 38;
     ldc = 42;
 
-    int x_len = (left_right == oneapi::mkl::side::right) ? n : m;
+    int x_len = (left_right == oneapi::math::side::right) ? n : m;
 
     int64_t stride_a, stride_x, stride_c;
     stride_x = x_len * std::abs(incx);
@@ -73,8 +73,8 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 
     for (i = 0; i < batch_size; i++) {
         rand_vector(x.data() + stride_x * i, x_len, incx);
-        rand_matrix(A.data() + stride_a * i, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
-        rand_matrix(C.data() + stride_c * i, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+        rand_matrix(A.data() + stride_a * i, layout, oneapi::math::transpose::nontrans, m, n, lda);
+        rand_matrix(C.data() + stride_c * i, layout, oneapi::math::transpose::nontrans, m, n, ldc);
     }
 
     C_ref = C;
@@ -120,27 +120,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::dgmm_batch(main_queue, left_right, m, n, A_buffer,
-                                                            lda, stride_a, x_buffer, incx, stride_x,
-                                                            C_buffer, ldc, stride_c, batch_size);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::dgmm_batch(
+                    main_queue, left_right, m, n, A_buffer, lda, stride_a, x_buffer, incx, stride_x,
+                    C_buffer, ldc, stride_c, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::dgmm_batch(main_queue, left_right, m, n, A_buffer,
-                                                         lda, stride_a, x_buffer, incx, stride_x,
-                                                         C_buffer, ldc, stride_c, batch_size);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::dgmm_batch(main_queue, left_right, m, n, A_buffer,
+                                                          lda, stride_a, x_buffer, incx, stride_x,
+                                                          C_buffer, ldc, stride_c, batch_size);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dgmm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dgmm_batch,
                                         left_right, m, n, A_buffer, lda, stride_a, x_buffer, incx,
                                         stride_x, C_buffer, ldc, stride_c, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dgmm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dgmm_batch,
                                         left_right, m, n, A_buffer, lda, stride_a, x_buffer, incx,
                                         stride_x, C_buffer, ldc, stride_c, batch_size);
                 break;
@@ -154,7 +154,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -176,76 +176,76 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 }
 
 class DgmmBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(DgmmBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, 2, 5));
+                                  oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, -2, 5));
+                                  oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, 1, 5));
+                                  oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, 2, 5));
+                                  oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, -2, 5));
+                                  oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, 1, 5));
+                                  oneapi::math::side::left, 1, 5));
 }
 
 TEST_P(DgmmBatchStrideTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, 2, 5));
+                                   oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, -2, 5));
+                                   oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, 1, 5));
+                                   oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, 2, 5));
+                                   oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, -2, 5));
+                                   oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, 1, 5));
+                                   oneapi::math::side::left, 1, 5));
 }
 
 TEST_P(DgmmBatchStrideTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, 2, 5));
+                                                oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, -2, 5));
+                                                oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, 1, 5));
+                                                oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, 2, 5));
+                                                oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, -2, 5));
+                                                oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, 1, 5));
+                                                oneapi::math::side::left, 1, 5));
 }
 
 TEST_P(DgmmBatchStrideTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, 2, 5));
+                                                 oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, -2, 5));
+                                                 oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, 1, 5));
+                                                 oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, 2, 5));
+                                                 oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, -2, 5));
+                                                 oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, 1, 5));
+                                                 oneapi::math::side::left, 1, 5));
 }
 
 INSTANTIATE_TEST_SUITE_P(DgmmBatchStrideTestSuite, DgmmBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp
index 7b6389b0f..c486ac90e 100644
--- a/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, int64_t incx,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right, int64_t incx,
          int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -80,7 +80,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     lda = 38;
     ldc = 42;
 
-    int x_len = (left_right == oneapi::mkl::side::right) ? n : m;
+    int x_len = (left_right == oneapi::math::side::right) ? n : m;
 
     int64_t stride_a, stride_x, stride_c;
     stride_x = x_len * std::abs(incx);
@@ -96,8 +96,8 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 
     for (i = 0; i < batch_size; i++) {
         rand_vector(&x[stride_x * i], x_len, incx);
-        rand_matrix(&A[stride_a * i], layout, oneapi::mkl::transpose::nontrans, m, n, lda);
-        rand_matrix(&C[stride_c * i], layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+        rand_matrix(&A[stride_a * i], layout, oneapi::math::transpose::nontrans, m, n, lda);
+        rand_matrix(&C[stride_c * i], layout, oneapi::math::transpose::nontrans, m, n, ldc);
     }
 
     C_ref.resize(C.size());
@@ -125,13 +125,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::dgmm_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::dgmm_batch(
                     main_queue, left_right, m, n, &A[0], lda, stride_a, &x[0], incx, stride_x,
                     &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::dgmm_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::dgmm_batch(
                     main_queue, left_right, m, n, &A[0], lda, stride_a, &x[0], incx, stride_x,
                     &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
@@ -140,13 +140,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dgmm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dgmm_batch,
                                         left_right, m, n, &A[0], lda, stride_a, &x[0], incx,
                                         stride_x, &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dgmm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dgmm_batch,
                                         left_right, m, n, &A[0], lda, stride_a, &x[0], incx,
                                         stride_x, &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
@@ -161,7 +161,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -181,76 +181,76 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 }
 
 class DgmmBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(DgmmBatchStrideUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, 2, 5));
+                                  oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, -2, 5));
+                                  oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, 1, 5));
+                                  oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, 2, 5));
+                                  oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, -2, 5));
+                                  oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, 1, 5));
+                                  oneapi::math::side::left, 1, 5));
 }
 
 TEST_P(DgmmBatchStrideUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, 2, 5));
+                                   oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, -2, 5));
+                                   oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, 1, 5));
+                                   oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, 2, 5));
+                                   oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, -2, 5));
+                                   oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, 1, 5));
+                                   oneapi::math::side::left, 1, 5));
 }
 
 TEST_P(DgmmBatchStrideUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, 2, 5));
+                                                oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, -2, 5));
+                                                oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, 1, 5));
+                                                oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, 2, 5));
+                                                oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, -2, 5));
+                                                oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, 1, 5));
+                                                oneapi::math::side::left, 1, 5));
 }
 
 TEST_P(DgmmBatchStrideUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, 2, 5));
+                                                 oneapi::math::side::right, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, -2, 5));
+                                                 oneapi::math::side::right, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, 1, 5));
+                                                 oneapi::math::side::right, 1, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, 2, 5));
+                                                 oneapi::math::side::left, 2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, -2, 5));
+                                                 oneapi::math::side::left, -2, 5));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, 1, 5));
+                                                 oneapi::math::side::left, 1, 5));
 }
 
 INSTANTIATE_TEST_SUITE_P(DgmmBatchStrideUsmTestSuite, DgmmBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp b/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp
index 87b127358..3df3bffd2 100644
--- a/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp
@@ -30,10 +30,10 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 #include "allocator_helper.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -73,8 +73,8 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     vector<int64_t, decltype(uaint)> m(uaint), n(uaint), lda(uaint), incx(uaint), ldc(uaint),
         group_size(uaint);
 
-    auto uaside = usm_allocator<oneapi::mkl::side, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::side, decltype(uaside)> left_right(uaside);
+    auto uaside = usm_allocator<oneapi::math::side, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::side, decltype(uaside)> left_right(uaside);
 
     m.resize(group_count);
     n.resize(group_count);
@@ -98,7 +98,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         incx[i] = -3 + std::rand() % 6;
         incx[i] = (incx[i] == 0) ? 3 : incx[i];
         ldc[i] = std::max(m[i], n[i]);
-        left_right[i] = (oneapi::mkl::side)(std::rand() % 2);
+        left_right[i] = (oneapi::math::side)(std::rand() % 2);
         total_batch_count += group_size[i];
     }
 
@@ -111,19 +111,21 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     idx = 0;
     for (i = 0; i < group_count; i++) {
-        size_a = (layout == oneapi::mkl::layout::col_major) ? lda[i] * n[i] : lda[i] * m[i];
-        x_len = (left_right[i] == oneapi::mkl::side::R) ? n[i] : m[i];
+        size_a = (layout == oneapi::math::layout::col_major) ? lda[i] * n[i] : lda[i] * m[i];
+        x_len = (left_right[i] == oneapi::math::side::R) ? n[i] : m[i];
         size_x = 1 + (x_len - 1) * std::abs(incx[i]);
-        size_c = (layout == oneapi::mkl::layout::col_major) ? ldc[i] * n[i] : ldc[i] * m[i];
+        size_c = (layout == oneapi::math::layout::col_major) ? ldc[i] * n[i] : ldc[i] * m[i];
         for (j = 0; j < group_size[i]; j++) {
-            a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
-            x_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt);
-            c_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
-            c_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
-            rand_matrix(a_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], lda[i]);
+            a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
+            x_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt);
+            c_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
+            c_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
+            rand_matrix(a_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i],
+                        lda[i]);
             rand_vector(x_array[idx], x_len, incx[i]);
-            rand_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldc[i]);
-            copy_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldc[i],
+            rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i],
+                        ldc[i]);
+            copy_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldc[i],
                         c_ref_array[idx]);
             idx++;
         }
@@ -131,33 +133,33 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     // Call reference DGMM_BATCH.
     using fp_ref = typename ref_type_info<fp>::type;
-    int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* incx_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* ldc_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
+    int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* incx_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* ldc_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
 
     CBLAS_SIDE* left_right_ref =
-        (CBLAS_SIDE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count);
+        (CBLAS_SIDE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count);
 
     if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (incx_ref == NULL) ||
         (ldc_ref == NULL) || (left_right_ref == NULL) || (group_size_ref == NULL)) {
         std::cout << "Error cannot allocate input arrays\n";
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(incx_ref);
-        oneapi::mkl::aligned_free(ldc_ref);
-        oneapi::mkl::aligned_free(left_right_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(incx_ref);
+        oneapi::math::aligned_free(ldc_ref);
+        oneapi::math::aligned_free(left_right_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(x_array[idx], cxt);
-                oneapi::mkl::free_shared(c_array[idx], cxt);
-                oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(x_array[idx], cxt);
+                oneapi::math::free_shared(c_array[idx], cxt);
+                oneapi::math::free_shared(c_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -186,14 +188,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::dgmm_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::dgmm_batch(
                     main_queue, &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], &lda[0],
                     (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count,
                     &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::dgmm_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::dgmm_batch(
                     main_queue, &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], &lda[0],
                     (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count,
                     &group_size[0], dependencies);
@@ -203,14 +205,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dgmm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dgmm_batch,
                                         &left_right[0], &m[0], &n[0], (const fp**)&a_array[0],
                                         &lda[0], (const fp**)&x_array[0], &incx[0], &c_array[0],
                                         &ldc[0], group_count, &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dgmm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dgmm_batch,
                                         &left_right[0], &m[0], &n[0], (const fp**)&a_array[0],
                                         &lda[0], (const fp**)&x_array[0], &incx[0], &c_array[0],
                                         &ldc[0], group_count, &group_size[0], dependencies);
@@ -226,21 +228,21 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(incx_ref);
-        oneapi::mkl::aligned_free(ldc_ref);
-        oneapi::mkl::aligned_free(left_right_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(incx_ref);
+        oneapi::math::aligned_free(ldc_ref);
+        oneapi::math::aligned_free(left_right_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(x_array[idx], cxt);
-                oneapi::mkl::free_shared(c_array[idx], cxt);
-                oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(x_array[idx], cxt);
+                oneapi::math::free_shared(c_array[idx], cxt);
+                oneapi::math::free_shared(c_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -262,20 +264,20 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         }
     }
 
-    oneapi::mkl::aligned_free(m_ref);
-    oneapi::mkl::aligned_free(n_ref);
-    oneapi::mkl::aligned_free(lda_ref);
-    oneapi::mkl::aligned_free(incx_ref);
-    oneapi::mkl::aligned_free(ldc_ref);
-    oneapi::mkl::aligned_free(left_right_ref);
-    oneapi::mkl::aligned_free(group_size_ref);
+    oneapi::math::aligned_free(m_ref);
+    oneapi::math::aligned_free(n_ref);
+    oneapi::math::aligned_free(lda_ref);
+    oneapi::math::aligned_free(incx_ref);
+    oneapi::math::aligned_free(ldc_ref);
+    oneapi::math::aligned_free(left_right_ref);
+    oneapi::math::aligned_free(group_size_ref);
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(a_array[idx], cxt);
-            oneapi::mkl::free_shared(x_array[idx], cxt);
-            oneapi::mkl::free_shared(c_array[idx], cxt);
-            oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+            oneapi::math::free_shared(a_array[idx], cxt);
+            oneapi::math::free_shared(x_array[idx], cxt);
+            oneapi::math::free_shared(c_array[idx], cxt);
+            oneapi::math::free_shared(c_ref_array[idx], cxt);
             idx++;
         }
     }
@@ -284,7 +286,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 }
 
 class DgmmBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(DgmmBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -310,8 +312,8 @@ TEST_P(DgmmBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(DgmmBatchUsmTestSuite, DgmmBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/gemm_batch_stride.cpp b/tests/unit_tests/blas/batch/gemm_batch_stride.cpp
index 3dad54f33..50e90ccbb 100644
--- a/tests/unit_tests/blas/batch/gemm_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/gemm_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename Ta, typename Tb, typename Tc, typename Ts>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n, k;
     int64_t lda, ldb, ldc;
-    oneapi::mkl::transpose transa, transb;
+    oneapi::math::transpose transa, transb;
     Ts alpha, beta;
     int64_t i, tmp;
 
@@ -70,31 +70,31 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         (std::is_same<Ts, std::complex<double>>::value)) {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            transa = oneapi::mkl::transpose::conjtrans;
+            transa = oneapi::math::transpose::conjtrans;
         else
-            transa = (oneapi::mkl::transpose)tmp;
+            transa = (oneapi::math::transpose)tmp;
         tmp = std::rand() % 3;
         if (tmp == 2)
-            transb = oneapi::mkl::transpose::conjtrans;
+            transb = oneapi::math::transpose::conjtrans;
         else
-            transb = (oneapi::mkl::transpose)tmp;
+            transb = (oneapi::math::transpose)tmp;
     }
     else {
-        transa = (oneapi::mkl::transpose)(std::rand() % 2);
-        transb = (oneapi::mkl::transpose)(std::rand() % 2);
+        transa = (oneapi::math::transpose)(std::rand() % 2);
+        transb = (oneapi::math::transpose)(std::rand() % 2);
     }
 
     int64_t stride_a, stride_b, stride_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * k : lda * m;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * k;
+        case oneapi::math::layout::col_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * k : lda * m;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * n : ldb * k;
             stride_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * m : lda * k;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * k : ldb * n;
+        case oneapi::math::layout::row_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * m : lda * k;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * k : ldb * n;
             stride_c = ldc * m;
             break;
         default: break;
@@ -110,7 +110,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     for (i = 0; i < batch_size; i++) {
         rand_matrix(A.data() + stride_a * i, layout, transa, m, k, lda);
         rand_matrix(B.data() + stride_b * i, layout, transb, k, n, ldb);
-        rand_matrix(C.data() + stride_c * i, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+        rand_matrix(C.data() + stride_c * i, layout, oneapi::math::transpose::nontrans, m, n, ldc);
     }
 
     for (size_t i = 0; i < A.size(); ++i) {
@@ -167,13 +167,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gemm_batch(
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gemm_batch(
                     main_queue, transa, transb, m, n, k, alpha, A_buffer, lda, stride_a, B_buffer,
                     ldb, stride_b, beta, C_buffer, ldc, stride_c, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gemm_batch(
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gemm_batch(
                     main_queue, transa, transb, m, n, k, alpha, A_buffer, lda, stride_a, B_buffer,
                     ldb, stride_b, beta, C_buffer, ldc, stride_c, batch_size);
                 break;
@@ -181,14 +181,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm_batch,
                                         transa, transb, m, n, k, alpha, A_buffer, lda, stride_a,
                                         B_buffer, ldb, stride_b, beta, C_buffer, ldc, stride_c,
                                         batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_batch,
                                         transa, transb, m, n, k, alpha, A_buffer, lda, stride_a,
                                         B_buffer, ldb, stride_b, beta, C_buffer, ldc, stride_c,
                                         batch_size);
@@ -204,7 +204,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -223,7 +223,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         C_cast_ref[i] = C_ref[i];
     }
     auto C_accessor = C_buffer.get_host_access(read_only);
-    bool good = check_almost_equal_matrix(C_accessor, C_cast_ref, oneapi::mkl::layout::col_major,
+    bool good = check_almost_equal_matrix(C_accessor, C_cast_ref, oneapi::math::layout::col_major,
                                           stride_c * batch_size, 1, stride_c * batch_size,
                                           error_mag, std::cout);
 
@@ -231,7 +231,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
 }
 
 class GemmBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmBatchStrideTests, RealHalfPrecision) {
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half, sycl::half, sycl::half>(
@@ -281,8 +281,8 @@ TEST_P(GemmBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GemmBatchStrideTestSuite, GemmBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp
index 12a5a4f61..1f46e1d68 100644
--- a/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename Ta, typename Tb, typename Tc, typename Ts>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -71,7 +71,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n, k;
     int64_t lda, ldb, ldc;
-    oneapi::mkl::transpose transa, transb;
+    oneapi::math::transpose transa, transb;
     Ts alpha, beta;
 
     int64_t i, tmp;
@@ -89,31 +89,31 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         (std::is_same<Ts, std::complex<double>>::value)) {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            transa = oneapi::mkl::transpose::conjtrans;
+            transa = oneapi::math::transpose::conjtrans;
         else
-            transa = (oneapi::mkl::transpose)tmp;
+            transa = (oneapi::math::transpose)tmp;
         tmp = std::rand() % 3;
         if (tmp == 2)
-            transb = oneapi::mkl::transpose::conjtrans;
+            transb = oneapi::math::transpose::conjtrans;
         else
-            transb = (oneapi::mkl::transpose)tmp;
+            transb = (oneapi::math::transpose)tmp;
     }
     else {
-        transa = (oneapi::mkl::transpose)(std::rand() % 2);
-        transb = (oneapi::mkl::transpose)(std::rand() % 2);
+        transa = (oneapi::math::transpose)(std::rand() % 2);
+        transb = (oneapi::math::transpose)(std::rand() % 2);
     }
 
     int64_t stride_a, stride_b, stride_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * k : lda * m;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * k;
+        case oneapi::math::layout::col_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * k : lda * m;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * n : ldb * k;
             stride_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * m : lda * k;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * k : ldb * n;
+        case oneapi::math::layout::row_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * m : lda * k;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * k : ldb * n;
             stride_c = ldc * m;
             break;
         default: break;
@@ -136,17 +136,17 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     C_ref.resize(stride_c * batch_size);
     C_cast_ref.resize(stride_c * batch_size);
 
-    Ta** a_array = (Ta**)oneapi::mkl::malloc_shared(64, sizeof(Ta*) * batch_size, *dev, cxt);
-    Tb** b_array = (Tb**)oneapi::mkl::malloc_shared(64, sizeof(Tb*) * batch_size, *dev, cxt);
-    Tc** c_array = (Tc**)oneapi::mkl::malloc_shared(64, sizeof(Tc*) * batch_size, *dev, cxt);
-    Ts** c_ref_array = (Ts**)oneapi::mkl::malloc_shared(64, sizeof(Ts*) * batch_size, *dev, cxt);
+    Ta** a_array = (Ta**)oneapi::math::malloc_shared(64, sizeof(Ta*) * batch_size, *dev, cxt);
+    Tb** b_array = (Tb**)oneapi::math::malloc_shared(64, sizeof(Tb*) * batch_size, *dev, cxt);
+    Tc** c_array = (Tc**)oneapi::math::malloc_shared(64, sizeof(Tc*) * batch_size, *dev, cxt);
+    Ts** c_ref_array = (Ts**)oneapi::math::malloc_shared(64, sizeof(Ts*) * batch_size, *dev, cxt);
 
     if ((a_array == NULL) || (b_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) {
         std::cout << "Error cannot allocate arrays of pointers\n";
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(b_array, cxt);
-        oneapi::mkl::free_shared(c_array, cxt);
-        oneapi::mkl::free_shared(c_ref_array, cxt);
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(b_array, cxt);
+        oneapi::math::free_shared(c_array, cxt);
+        oneapi::math::free_shared(c_ref_array, cxt);
         return false;
     }
 
@@ -157,17 +157,17 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         c_ref_array[i] = &C_ref[i * stride_c];
     }
 
-    rand_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_a * batch_size, 1, stride_a * batch_size);
-    rand_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_b * batch_size, 1, stride_b * batch_size);
-    rand_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size);
-    copy_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_a * batch_size, 1, stride_a * batch_size, A_ref);
-    copy_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_b * batch_size, 1, stride_b * batch_size, B_ref);
-    copy_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size, C_ref);
 
     // Call reference GEMM_BATCH_STRIDE.
@@ -193,13 +193,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemm_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemm_batch(
                     main_queue, transa, transb, m, n, k, alpha, &A[0], lda, stride_a, &B[0], ldb,
                     stride_b, beta, &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemm_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemm_batch(
                     main_queue, transa, transb, m, n, k, alpha, &A[0], lda, stride_a, &B[0], ldb,
                     stride_b, beta, &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
@@ -208,14 +208,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         done.wait_and_throw();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm_batch,
                                         transa, transb, m, n, k, alpha, &A[0], lda, stride_a, &B[0],
                                         ldb, stride_b, beta, &C[0], ldc, stride_c, batch_size,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_batch,
                                         transa, transb, m, n, k, alpha, &A[0], lda, stride_a, &B[0],
                                         ldb, stride_b, beta, &C[0], ldc, stride_c, batch_size,
                                         dependencies);
@@ -231,11 +231,11 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(b_array, cxt);
-        oneapi::mkl::free_shared(c_array, cxt);
-        oneapi::mkl::free_shared(c_ref_array, cxt);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(b_array, cxt);
+        oneapi::math::free_shared(c_array, cxt);
+        oneapi::math::free_shared(c_ref_array, cxt);
         return test_skipped;
     }
 
@@ -253,20 +253,20 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     for (size_t i = 0; i < C_ref.size(); ++i) {
         C_cast_ref[i] = C_ref[i];
     }
-    bool good = check_almost_equal_matrix(C, C_cast_ref, oneapi::mkl::layout::col_major,
+    bool good = check_almost_equal_matrix(C, C_cast_ref, oneapi::math::layout::col_major,
                                           stride_c * batch_size, 1, stride_c * batch_size,
                                           error_mag, std::cout);
 
-    oneapi::mkl::free_shared(a_array, cxt);
-    oneapi::mkl::free_shared(b_array, cxt);
-    oneapi::mkl::free_shared(c_array, cxt);
-    oneapi::mkl::free_shared(c_ref_array, cxt);
+    oneapi::math::free_shared(a_array, cxt);
+    oneapi::math::free_shared(b_array, cxt);
+    oneapi::math::free_shared(c_array, cxt);
+    oneapi::math::free_shared(c_ref_array, cxt);
 
     return (int)good;
 }
 
 class GemmBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmBatchStrideUsmTests, RealHalfPrecision) {
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half, sycl::half, sycl::half>(
@@ -316,8 +316,8 @@ TEST_P(GemmBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GemmBatchStrideUsmTestSuite, GemmBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/gemm_batch_usm.cpp b/tests/unit_tests/blas/batch/gemm_batch_usm.cpp
index a78bbb26f..8c4fd6a37 100644
--- a/tests/unit_tests/blas/batch/gemm_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/gemm_batch_usm.cpp
@@ -30,10 +30,10 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 #include "allocator_helper.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename Ta, typename Tb, typename Tc, typename Ts>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -73,8 +73,8 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     vector<int64_t, decltype(uaint)> m(uaint), n(uaint), k(uaint), lda(uaint), ldb(uaint),
         ldc(uaint), group_size(uaint);
 
-    auto uatranspose = usm_allocator<oneapi::mkl::transpose, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::transpose, decltype(uatranspose)> transa(uatranspose), transb(uatranspose);
+    auto uatranspose = usm_allocator<oneapi::math::transpose, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::transpose, decltype(uatranspose)> transa(uatranspose), transb(uatranspose);
 
     auto uaTs = usm_allocator<Ts, usm::alloc::shared, 64>(cxt, *dev);
     vector<Ts, decltype(uaTs)> alpha(uaTs), beta(uaTs);
@@ -110,18 +110,18 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
             (std::is_same<Ts, std::complex<double>>::value)) {
             tmp = std::rand() % 3;
             if (tmp == 2)
-                transa[i] = oneapi::mkl::transpose::conjtrans;
+                transa[i] = oneapi::math::transpose::conjtrans;
             else
-                transa[i] = (oneapi::mkl::transpose)tmp;
+                transa[i] = (oneapi::math::transpose)tmp;
             tmp = std::rand() % 3;
             if (tmp == 2)
-                transb[i] = oneapi::mkl::transpose::conjtrans;
+                transb[i] = oneapi::math::transpose::conjtrans;
             else
-                transb[i] = (oneapi::mkl::transpose)tmp;
+                transb[i] = (oneapi::math::transpose)tmp;
         }
         else {
-            transa[i] = (oneapi::mkl::transpose)(std::rand() % 2);
-            transb[i] = (oneapi::mkl::transpose)(std::rand() % 2);
+            transa[i] = (oneapi::math::transpose)(std::rand() % 2);
+            transb[i] = (oneapi::math::transpose)(std::rand() % 2);
         }
         total_batch_count += group_size[i];
     }
@@ -145,33 +145,34 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                size_a = lda[i] * ((transa[i] == oneapi::mkl::transpose::nontrans) ? k[i] : m[i]);
-                size_b = ldb[i] * ((transb[i] == oneapi::mkl::transpose::nontrans) ? n[i] : k[i]);
+            case oneapi::math::layout::col_major:
+                size_a = lda[i] * ((transa[i] == oneapi::math::transpose::nontrans) ? k[i] : m[i]);
+                size_b = ldb[i] * ((transb[i] == oneapi::math::transpose::nontrans) ? n[i] : k[i]);
                 size_c = ldc[i] * n[i];
                 break;
-            case oneapi::mkl::layout::row_major:
-                size_a = lda[i] * ((transa[i] == oneapi::mkl::transpose::nontrans) ? m[i] : k[i]);
-                size_b = ldb[i] * ((transb[i] == oneapi::mkl::transpose::nontrans) ? k[i] : n[i]);
+            case oneapi::math::layout::row_major:
+                size_a = lda[i] * ((transa[i] == oneapi::math::transpose::nontrans) ? m[i] : k[i]);
+                size_b = ldb[i] * ((transb[i] == oneapi::math::transpose::nontrans) ? k[i] : n[i]);
                 size_c = ldc[i] * m[i];
                 break;
             default: break;
         }
         for (j = 0; j < group_size[i]; j++) {
-            a_array[idx] = (Ta*)oneapi::mkl::malloc_shared(64, sizeof(Ta) * size_a, *dev, cxt);
-            b_array[idx] = (Tb*)oneapi::mkl::malloc_shared(64, sizeof(Tb) * size_b, *dev, cxt);
-            c_array[idx] = (Tc*)oneapi::mkl::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt);
-            a_ref_array[idx] = (Ts*)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_a, *dev, cxt);
-            b_ref_array[idx] = (Ts*)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_b, *dev, cxt);
+            a_array[idx] = (Ta*)oneapi::math::malloc_shared(64, sizeof(Ta) * size_a, *dev, cxt);
+            b_array[idx] = (Tb*)oneapi::math::malloc_shared(64, sizeof(Tb) * size_b, *dev, cxt);
+            c_array[idx] = (Tc*)oneapi::math::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt);
+            a_ref_array[idx] = (Ts*)oneapi::math::malloc_shared(64, sizeof(Ts) * size_a, *dev, cxt);
+            b_ref_array[idx] = (Ts*)oneapi::math::malloc_shared(64, sizeof(Ts) * size_b, *dev, cxt);
             c_cast_ref_array[idx] =
-                (Tc*)oneapi::mkl::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt);
-            c_ref_array[idx] = (Ts*)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_c, *dev, cxt);
+                (Tc*)oneapi::math::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt);
+            c_ref_array[idx] = (Ts*)oneapi::math::malloc_shared(64, sizeof(Ts) * size_c, *dev, cxt);
             rand_matrix(a_array[idx], layout, transa[i], m[i], k[i], lda[i]);
             rand_matrix(b_array[idx], layout, transb[i], k[i], n[i], ldb[i]);
-            rand_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldc[i]);
+            rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i],
+                        ldc[i]);
             copy_matrix(a_array[idx], layout, transa[i], m[i], k[i], lda[i], a_ref_array[idx]);
             copy_matrix(b_array[idx], layout, transb[i], k[i], n[i], ldb[i], b_ref_array[idx]);
-            copy_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldc[i],
+            copy_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldc[i],
                         c_ref_array[idx]);
             idx++;
         }
@@ -179,42 +180,42 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     // Call reference GEMM_BATCH.
     using fp_ref = typename ref_type_info<Ts>::type;
-    int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* k_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* ldb_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* ldc_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
+    int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* k_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* ldb_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* ldc_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
 
     CBLAS_TRANSPOSE* transa_ref =
-        (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
+        (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
     CBLAS_TRANSPOSE* transb_ref =
-        (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
+        (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
 
     if ((m_ref == NULL) || (n_ref == NULL) || (k_ref == NULL) || (lda_ref == NULL) ||
         (ldb_ref == NULL) || (ldc_ref == NULL) || (transa_ref == NULL) || (transb_ref == NULL) ||
         (group_size_ref == NULL)) {
         std::cout << "Error cannot allocate input arrays\n";
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(k_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(ldb_ref);
-        oneapi::mkl::aligned_free(ldc_ref);
-        oneapi::mkl::aligned_free(transa_ref);
-        oneapi::mkl::aligned_free(transb_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(k_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(ldb_ref);
+        oneapi::math::aligned_free(ldc_ref);
+        oneapi::math::aligned_free(transa_ref);
+        oneapi::math::aligned_free(transb_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(b_array[idx], cxt);
-                oneapi::mkl::free_shared(c_array[idx], cxt);
-                oneapi::mkl::free_shared(a_ref_array[idx], cxt);
-                oneapi::mkl::free_shared(b_ref_array[idx], cxt);
-                oneapi::mkl::free_shared(c_cast_ref_array[idx], cxt);
-                oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(b_array[idx], cxt);
+                oneapi::math::free_shared(c_array[idx], cxt);
+                oneapi::math::free_shared(a_ref_array[idx], cxt);
+                oneapi::math::free_shared(b_ref_array[idx], cxt);
+                oneapi::math::free_shared(c_cast_ref_array[idx], cxt);
+                oneapi::math::free_shared(c_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -247,14 +248,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemm_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemm_batch(
                     main_queue, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0],
                     (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0],
                     &c_array[0], &ldc[0], group_count, &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemm_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemm_batch(
                     main_queue, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0],
                     (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0],
                     &c_array[0], &ldc[0], group_count, &group_size[0], dependencies);
@@ -264,15 +265,15 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait_and_throw();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm_batch,
                                         &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0],
                                         (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0],
                                         &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count,
                                         &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_batch,
                                         &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0],
                                         (const Ta**)&a_array[0], &lda[0], (const Ta**)&b_array[0],
                                         &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count,
@@ -289,26 +290,26 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(k_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(ldb_ref);
-        oneapi::mkl::aligned_free(ldc_ref);
-        oneapi::mkl::aligned_free(transa_ref);
-        oneapi::mkl::aligned_free(transb_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(k_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(ldb_ref);
+        oneapi::math::aligned_free(ldc_ref);
+        oneapi::math::aligned_free(transa_ref);
+        oneapi::math::aligned_free(transb_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(b_array[idx], cxt);
-                oneapi::mkl::free_shared(c_array[idx], cxt);
-                oneapi::mkl::free_shared(a_ref_array[idx], cxt);
-                oneapi::mkl::free_shared(b_ref_array[idx], cxt);
-                oneapi::mkl::free_shared(c_cast_ref_array[idx], cxt);
-                oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(b_array[idx], cxt);
+                oneapi::math::free_shared(c_array[idx], cxt);
+                oneapi::math::free_shared(a_ref_array[idx], cxt);
+                oneapi::math::free_shared(b_ref_array[idx], cxt);
+                oneapi::math::free_shared(c_cast_ref_array[idx], cxt);
+                oneapi::math::free_shared(c_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -330,32 +331,32 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
             if (std::is_same_v<Tc, int32_t>)
                 error_mag = 1;
 
-            copy_matrix(c_ref_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i],
+            copy_matrix(c_ref_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i],
                         ldc[i], c_cast_ref_array[idx]);
             good = good && check_almost_equal_matrix(c_array[idx], c_cast_ref_array[idx], layout,
                                                      m[i], n[i], ldc[i], error_mag, std::cout);
             idx++;
         }
     }
-    oneapi::mkl::aligned_free(m_ref);
-    oneapi::mkl::aligned_free(n_ref);
-    oneapi::mkl::aligned_free(k_ref);
-    oneapi::mkl::aligned_free(lda_ref);
-    oneapi::mkl::aligned_free(ldb_ref);
-    oneapi::mkl::aligned_free(ldc_ref);
-    oneapi::mkl::aligned_free(transa_ref);
-    oneapi::mkl::aligned_free(transb_ref);
-    oneapi::mkl::aligned_free(group_size_ref);
+    oneapi::math::aligned_free(m_ref);
+    oneapi::math::aligned_free(n_ref);
+    oneapi::math::aligned_free(k_ref);
+    oneapi::math::aligned_free(lda_ref);
+    oneapi::math::aligned_free(ldb_ref);
+    oneapi::math::aligned_free(ldc_ref);
+    oneapi::math::aligned_free(transa_ref);
+    oneapi::math::aligned_free(transb_ref);
+    oneapi::math::aligned_free(group_size_ref);
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(a_array[idx], cxt);
-            oneapi::mkl::free_shared(b_array[idx], cxt);
-            oneapi::mkl::free_shared(c_array[idx], cxt);
-            oneapi::mkl::free_shared(a_ref_array[idx], cxt);
-            oneapi::mkl::free_shared(b_ref_array[idx], cxt);
-            oneapi::mkl::free_shared(c_cast_ref_array[idx], cxt);
-            oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+            oneapi::math::free_shared(a_array[idx], cxt);
+            oneapi::math::free_shared(b_array[idx], cxt);
+            oneapi::math::free_shared(c_array[idx], cxt);
+            oneapi::math::free_shared(a_ref_array[idx], cxt);
+            oneapi::math::free_shared(b_ref_array[idx], cxt);
+            oneapi::math::free_shared(c_cast_ref_array[idx], cxt);
+            oneapi::math::free_shared(c_ref_array[idx], cxt);
             idx++;
         }
     }
@@ -364,7 +365,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 }
 
 class GemmBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmBatchUsmTests, RealHalfPrecision) {
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half, sycl::half, sycl::half>(
@@ -414,8 +415,8 @@ TEST_P(GemmBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GemmBatchUsmTestSuite, GemmBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/gemv_batch_stride.cpp b/tests/unit_tests/blas/batch/gemv_batch_stride.cpp
index f50686c13..bed54f5e7 100644
--- a/tests/unit_tests/blas/batch/gemv_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/gemv_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
     int64_t lda;
-    oneapi::mkl::transpose transa;
+    oneapi::math::transpose transa;
     fp alpha, beta;
     int64_t i, tmp;
 
@@ -64,14 +64,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     beta = rand_scalar<fp>();
 
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        transa = (oneapi::mkl::transpose)(std::rand() % 2);
+        transa = (oneapi::math::transpose)(std::rand() % 2);
     }
     else {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            transa = oneapi::mkl::transpose::conjtrans;
+            transa = oneapi::math::transpose::conjtrans;
         else
-            transa = (oneapi::mkl::transpose)tmp;
+            transa = (oneapi::math::transpose)tmp;
     }
 
     int x_len = outer_dimension(transa, m, n);
@@ -88,7 +88,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     for (i = 0; i < batch_size; i++) {
         rand_vector(x.data() + stride_x * i, x_len, incx);
         rand_vector(y.data() + stride_y * i, y_len, incy);
-        rand_matrix(A.data() + stride_a * i, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+        rand_matrix(A.data() + stride_a * i, layout, oneapi::math::transpose::nontrans, m, n, lda);
     }
 
     y_ref = y;
@@ -135,13 +135,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gemv_batch(
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gemv_batch(
                     main_queue, transa, m, n, alpha, A_buffer, lda, stride_a, x_buffer, incx,
                     stride_x, beta, y_buffer, incy, stride_y, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gemv_batch(
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gemv_batch(
                     main_queue, transa, m, n, alpha, A_buffer, lda, stride_a, x_buffer, incx,
                     stride_x, beta, y_buffer, incy, stride_y, batch_size);
                 break;
@@ -149,13 +149,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemv_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemv_batch,
                                         transa, m, n, alpha, A_buffer, lda, stride_a, x_buffer,
                                         incx, stride_x, beta, y_buffer, incy, stride_y, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemv_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemv_batch,
                                         transa, m, n, alpha, A_buffer, lda, stride_a, x_buffer,
                                         incx, stride_x, beta, y_buffer, incy, stride_y, batch_size);
                 break;
@@ -169,7 +169,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -191,7 +191,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
 }
 
 class GemvBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemvBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 5));
@@ -225,8 +225,8 @@ TEST_P(GemvBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GemvBatchStrideTestSuite, GemvBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp
index a61d7d318..e009f1758 100644
--- a/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -71,7 +71,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     // Prepare data.
     int64_t m, n;
     int64_t lda;
-    oneapi::mkl::transpose transa;
+    oneapi::math::transpose transa;
     fp alpha, beta;
     int64_t i, tmp;
 
@@ -83,14 +83,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     beta = rand_scalar<fp>();
 
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        transa = (oneapi::mkl::transpose)(std::rand() % 2);
+        transa = (oneapi::math::transpose)(std::rand() % 2);
     }
     else {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            transa = oneapi::mkl::transpose::conjtrans;
+            transa = oneapi::math::transpose::conjtrans;
         else
-            transa = (oneapi::mkl::transpose)tmp;
+            transa = (oneapi::math::transpose)tmp;
     }
 
     int x_len = outer_dimension(transa, m, n);
@@ -111,7 +111,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     for (i = 0; i < batch_size; i++) {
         rand_vector(&x[stride_x * i], x_len, incx);
         rand_vector(&y[stride_y * i], y_len, incy);
-        rand_matrix(&A[stride_a * i], layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+        rand_matrix(&A[stride_a * i], layout, oneapi::math::transpose::nontrans, m, n, lda);
     }
 
     y_ref.resize(y.size());
@@ -139,13 +139,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemv_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemv_batch(
                     main_queue, transa, m, n, alpha, &A[0], lda, stride_a, &x[0], incx, stride_x,
                     beta, &y[0], incy, stride_y, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemv_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemv_batch(
                     main_queue, transa, m, n, alpha, &A[0], lda, stride_a, &x[0], incx, stride_x,
                     beta, &y[0], incy, stride_y, batch_size, dependencies);
                 break;
@@ -154,14 +154,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemv_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemv_batch,
                                         transa, m, n, alpha, &A[0], lda, stride_a, &x[0], incx,
                                         stride_x, beta, &y[0], incy, stride_y, batch_size,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemv_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemv_batch,
                                         transa, m, n, alpha, &A[0], lda, stride_a, &x[0], incx,
                                         stride_x, beta, &y[0], incy, stride_y, batch_size,
                                         dependencies);
@@ -177,7 +177,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -197,7 +197,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in
 }
 
 class GemvBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemvBatchStrideUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 5));
@@ -231,8 +231,8 @@ TEST_P(GemvBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GemvBatchStrideUsmTestSuite, GemvBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/gemv_batch_usm.cpp b/tests/unit_tests/blas/batch/gemv_batch_usm.cpp
index 2d257d0be..71925b26a 100644
--- a/tests/unit_tests/blas/batch/gemv_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/gemv_batch_usm.cpp
@@ -30,10 +30,10 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 #include "allocator_helper.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -73,8 +73,8 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     vector<int64_t, decltype(uaint)> m(uaint), n(uaint), lda(uaint), incx(uaint), incy(uaint),
         group_size(uaint);
 
-    auto uatranspose = usm_allocator<oneapi::mkl::transpose, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::transpose, decltype(uatranspose)> transa(uatranspose);
+    auto uatranspose = usm_allocator<oneapi::math::transpose, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::transpose, decltype(uatranspose)> transa(uatranspose);
 
     auto uafp = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(uafp)> alpha(uafp), beta(uafp);
@@ -107,14 +107,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         alpha[i] = rand_scalar<fp>();
         beta[i] = rand_scalar<fp>();
         if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-            transa[i] = (oneapi::mkl::transpose)(std::rand() % 2);
+            transa[i] = (oneapi::math::transpose)(std::rand() % 2);
         }
         else {
             tmp = std::rand() % 3;
             if (tmp == 2)
-                transa[i] = oneapi::mkl::transpose::conjtrans;
+                transa[i] = oneapi::math::transpose::conjtrans;
             else
-                transa[i] = (oneapi::mkl::transpose)tmp;
+                transa[i] = (oneapi::math::transpose)tmp;
         }
         total_batch_count += group_size[i];
     }
@@ -128,17 +128,18 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     idx = 0;
     for (i = 0; i < group_count; i++) {
-        size_a = (layout == oneapi::mkl::layout::col_major) ? lda[i] * n[i] : lda[i] * m[i];
-        x_len = (transa[i] == oneapi::mkl::transpose::nontrans) ? n[i] : m[i];
-        y_len = (transa[i] == oneapi::mkl::transpose::nontrans) ? m[i] : n[i];
+        size_a = (layout == oneapi::math::layout::col_major) ? lda[i] * n[i] : lda[i] * m[i];
+        x_len = (transa[i] == oneapi::math::transpose::nontrans) ? n[i] : m[i];
+        y_len = (transa[i] == oneapi::math::transpose::nontrans) ? m[i] : n[i];
         size_x = 1 + (x_len - 1) * std::abs(incx[i]);
         size_y = 1 + (y_len - 1) * std::abs(incy[i]);
         for (j = 0; j < group_size[i]; j++) {
-            a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
-            x_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt);
-            y_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt);
-            y_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt);
-            rand_matrix(a_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], lda[i]);
+            a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
+            x_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt);
+            y_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt);
+            y_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt);
+            rand_matrix(a_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i],
+                        lda[i]);
             rand_vector(x_array[idx], x_len, incx[i]);
             rand_vector(y_array[idx], y_len, incy[i]);
             copy_vector(y_array[idx], y_len, incy[i], y_ref_array[idx]);
@@ -148,33 +149,33 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     // Call reference GEMV_BATCH.
     using fp_ref = typename ref_type_info<fp>::type;
-    int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* incx_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* incy_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
+    int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* incx_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* incy_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
 
     CBLAS_TRANSPOSE* transa_ref =
-        (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
+        (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
 
     if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (incx_ref == NULL) ||
         (incy_ref == NULL) || (transa_ref == NULL) || (group_size_ref == NULL)) {
         std::cout << "Error cannot allocate input arrays\n";
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(incx_ref);
-        oneapi::mkl::aligned_free(incy_ref);
-        oneapi::mkl::aligned_free(transa_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(incx_ref);
+        oneapi::math::aligned_free(incy_ref);
+        oneapi::math::aligned_free(transa_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(x_array[idx], cxt);
-                oneapi::mkl::free_shared(y_array[idx], cxt);
-                oneapi::mkl::free_shared(y_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(x_array[idx], cxt);
+                oneapi::math::free_shared(y_array[idx], cxt);
+                oneapi::math::free_shared(y_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -203,14 +204,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemv_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemv_batch(
                     main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp**)&a_array[0],
                     &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0],
                     group_count, &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemv_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemv_batch(
                     main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp**)&a_array[0],
                     &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0],
                     group_count, &group_size[0], dependencies);
@@ -220,16 +221,16 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemv_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemv_batch,
                                         &transa[0], &m[0], &n[0], &alpha[0],
                                         (const fp**)&a_array[0], &lda[0], (const fp**)&x_array[0],
                                         &incx[0], &beta[0], &y_array[0], &incy[0], group_count,
                                         &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
+            case oneapi::math::layout::row_major:
                 TEST_RUN_BLAS_CT_SELECT(
-                    main_queue, oneapi::mkl::blas::row_major::gemv_batch, &transa[0], &m[0], &n[0],
+                    main_queue, oneapi::math::blas::row_major::gemv_batch, &transa[0], &m[0], &n[0],
                     &alpha[0], (const fp**)&a_array[0], &lda[0], (const fp**)&x_array[0], &incx[0],
                     &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies);
                 break;
@@ -244,21 +245,21 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(incx_ref);
-        oneapi::mkl::aligned_free(incy_ref);
-        oneapi::mkl::aligned_free(transa_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(incx_ref);
+        oneapi::math::aligned_free(incy_ref);
+        oneapi::math::aligned_free(transa_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(x_array[idx], cxt);
-                oneapi::mkl::free_shared(y_array[idx], cxt);
-                oneapi::mkl::free_shared(y_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(x_array[idx], cxt);
+                oneapi::math::free_shared(y_array[idx], cxt);
+                oneapi::math::free_shared(y_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -273,7 +274,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     // Compare the results of reference implementation and DPC++ implementation.
     idx = 0;
     for (i = 0; i < group_count; i++) {
-        y_len = (transa[i] == oneapi::mkl::transpose::nontrans) ? m[i] : n[i];
+        y_len = (transa[i] == oneapi::math::transpose::nontrans) ? m[i] : n[i];
         for (j = 0; j < group_size[i]; j++) {
             good = good && check_equal_vector(y_array[idx], y_ref_array[idx], y_len, incy[i],
                                               std::max<int>(m[i], n[i]), std::cout);
@@ -281,20 +282,20 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         }
     }
 
-    oneapi::mkl::aligned_free(m_ref);
-    oneapi::mkl::aligned_free(n_ref);
-    oneapi::mkl::aligned_free(lda_ref);
-    oneapi::mkl::aligned_free(incx_ref);
-    oneapi::mkl::aligned_free(incy_ref);
-    oneapi::mkl::aligned_free(transa_ref);
-    oneapi::mkl::aligned_free(group_size_ref);
+    oneapi::math::aligned_free(m_ref);
+    oneapi::math::aligned_free(n_ref);
+    oneapi::math::aligned_free(lda_ref);
+    oneapi::math::aligned_free(incx_ref);
+    oneapi::math::aligned_free(incy_ref);
+    oneapi::math::aligned_free(transa_ref);
+    oneapi::math::aligned_free(group_size_ref);
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(a_array[idx], cxt);
-            oneapi::mkl::free_shared(x_array[idx], cxt);
-            oneapi::mkl::free_shared(y_array[idx], cxt);
-            oneapi::mkl::free_shared(y_ref_array[idx], cxt);
+            oneapi::math::free_shared(a_array[idx], cxt);
+            oneapi::math::free_shared(x_array[idx], cxt);
+            oneapi::math::free_shared(y_array[idx], cxt);
+            oneapi::math::free_shared(y_ref_array[idx], cxt);
             idx++;
         }
     }
@@ -303,7 +304,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 }
 
 class GemvBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemvBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -329,8 +330,8 @@ TEST_P(GemvBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GemvBatchUsmTestSuite, GemvBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp
index a6e9a6fe5..7bb36b1ce 100644
--- a/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i, tmp;
 
@@ -66,14 +66,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
 
     int64_t stride_a, stride_b, stride;
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             stride_a = lda * n;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             stride = std::max(stride_a, stride_b);
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             stride_a = lda * m;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             stride = std::max(stride_a, stride_b);
             break;
         default: break;
@@ -81,9 +81,9 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
 
     vector<fp, allocator_helper<fp, 64>> AB(stride * batch_size), AB_ref(stride * batch_size);
 
-    rand_matrix(AB.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(AB.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride * batch_size, 1, stride * batch_size);
-    copy_matrix(AB.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(AB.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride * batch_size, 1, stride * batch_size, AB_ref.data());
 
     // Call reference IMATCOPY_BATCH_STRIDE.
@@ -120,25 +120,25 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::imatcopy_batch(
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::imatcopy_batch(
                     main_queue, trans, m, n, alpha, AB_buffer, lda, ldb, stride, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::imatcopy_batch(
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::imatcopy_batch(
                     main_queue, trans, m, n, alpha, AB_buffer, lda, ldb, stride, batch_size);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::imatcopy_batch,
-                                        trans, m, n, alpha, AB_buffer, lda, ldb, stride,
-                                        batch_size);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue,
+                                        oneapi::math::blas::column_major::imatcopy_batch, trans, m,
+                                        n, alpha, AB_buffer, lda, ldb, stride, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::imatcopy_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy_batch,
                                         trans, m, n, alpha, AB_buffer, lda, ldb, stride,
                                         batch_size);
                 break;
@@ -152,7 +152,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -164,14 +164,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     auto AB_accessor = AB_buffer.get_host_access(read_only);
-    bool good = check_equal_matrix(AB_accessor, AB_ref, oneapi::mkl::layout::col_major,
+    bool good = check_equal_matrix(AB_accessor, AB_ref, oneapi::math::layout::col_major,
                                    stride * batch_size, 1, stride * batch_size, 10, std::cout);
 
     return (int)good;
 }
 
 class ImatcopyBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(ImatcopyBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -197,8 +197,8 @@ TEST_P(ImatcopyBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(ImatcopyBatchStrideTestSuite, ImatcopyBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp
index db40e3a1f..e0bd7ead0 100644
--- a/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -71,7 +71,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i;
 
@@ -85,14 +85,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
 
     int64_t stride_a, stride_b, stride;
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             stride_a = lda * n;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             stride = std::max(stride_a, stride_b);
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             stride_a = lda * m;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             stride = std::max(stride_a, stride_b);
             break;
         default: break;
@@ -103,12 +103,12 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
 
     AB.resize(stride * batch_size);
     AB_ref.resize(stride * batch_size);
-    fp** ab_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** ab_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** ab_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** ab_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
     if ((ab_array == NULL) || (ab_ref_array == NULL)) {
         std::cout << "Error cannot allocate arrays of pointers\n";
-        oneapi::mkl::free_shared(ab_array, cxt);
-        oneapi::mkl::free_shared(ab_ref_array, cxt);
+        oneapi::math::free_shared(ab_array, cxt);
+        oneapi::math::free_shared(ab_ref_array, cxt);
         return false;
     }
 
@@ -117,9 +117,9 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         ab_ref_array[i] = &AB_ref[i * stride];
     }
 
-    rand_matrix(AB, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride * batch_size, 1, stride * batch_size);
-    copy_matrix(AB, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride * batch_size, 1, stride * batch_size, AB_ref);
 
     // Call reference IMATCOPY_BATCH_STRIDE.
@@ -136,28 +136,28 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::imatcopy_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::imatcopy_batch(
                     main_queue, trans, m, n, alpha, &AB[0], lda, ldb, stride, batch_size,
                     dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::imatcopy_batch(main_queue, trans, m, n, alpha,
-                                                                    &AB[0], lda, ldb, stride,
-                                                                    batch_size, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::imatcopy_batch(main_queue, trans, m, n, alpha,
+                                                                     &AB[0], lda, ldb, stride,
+                                                                     batch_size, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::imatcopy_batch,
-                                        trans, m, n, alpha, &AB[0], lda, ldb, stride, batch_size,
-                                        dependencies);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(
+                    main_queue, oneapi::math::blas::column_major::imatcopy_batch, trans, m, n,
+                    alpha, &AB[0], lda, ldb, stride, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::imatcopy_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy_batch,
                                         trans, m, n, alpha, &AB[0], lda, ldb, stride, batch_size,
                                         dependencies);
                 break;
@@ -172,9 +172,9 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::free_shared(ab_array, cxt);
-        oneapi::mkl::free_shared(ab_ref_array, cxt);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::free_shared(ab_array, cxt);
+        oneapi::math::free_shared(ab_ref_array, cxt);
         return test_skipped;
     }
 
@@ -184,17 +184,17 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(AB, AB_ref, oneapi::mkl::layout::col_major, stride * batch_size,
+    bool good = check_equal_matrix(AB, AB_ref, oneapi::math::layout::col_major, stride * batch_size,
                                    1, stride * batch_size, 10, std::cout);
 
-    oneapi::mkl::free_shared(ab_array, cxt);
-    oneapi::mkl::free_shared(ab_ref_array, cxt);
+    oneapi::math::free_shared(ab_array, cxt);
+    oneapi::math::free_shared(ab_ref_array, cxt);
 
     return (int)good;
 }
 
 class ImatcopyBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(ImatcopyBatchStrideUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -220,8 +220,8 @@ TEST_P(ImatcopyBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(ImatcopyBatchStrideUsmTestSuite, ImatcopyBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp
index d203f2440..5c2a38ada 100644
--- a/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp
@@ -32,9 +32,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -49,7 +49,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -73,8 +73,8 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     auto uaint = usm_allocator<int64_t, usm::alloc::shared, 64>(cxt, *dev);
     vector<int64_t, decltype(uaint)> m(uaint), n(uaint), lda(uaint), ldb(uaint), group_size(uaint);
 
-    auto uatranspose = usm_allocator<oneapi::mkl::transpose, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::transpose, decltype(uatranspose)> trans(uatranspose);
+    auto uatranspose = usm_allocator<oneapi::math::transpose, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::transpose, decltype(uatranspose)> trans(uatranspose);
 
     auto uafp = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(uafp)> alpha(uafp);
@@ -112,26 +112,26 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
+            case oneapi::math::layout::col_major:
                 size_a = lda[i] * n[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
                 break;
-            case oneapi::mkl::layout::row_major:
+            case oneapi::math::layout::row_major:
                 size_a = lda[i] * m[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
                 break;
             default: break;
         }
         size = std::max(size_a, size_b);
         for (j = 0; j < group_size[i]; j++) {
-            ab_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size, *dev, cxt);
-            ab_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size, *dev, cxt);
-            rand_matrix(ab_array[idx], oneapi::mkl::layout::col_major,
-                        oneapi::mkl::transpose::nontrans, size, 1, size);
-            copy_matrix(ab_array[idx], oneapi::mkl::layout::col_major,
-                        oneapi::mkl::transpose::nontrans, size, 1, size, ab_ref_array[idx]);
+            ab_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size, *dev, cxt);
+            ab_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size, *dev, cxt);
+            rand_matrix(ab_array[idx], oneapi::math::layout::col_major,
+                        oneapi::math::transpose::nontrans, size, 1, size);
+            copy_matrix(ab_array[idx], oneapi::math::layout::col_major,
+                        oneapi::math::transpose::nontrans, size, 1, size, ab_ref_array[idx]);
             idx++;
         }
     }
@@ -155,13 +155,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::imatcopy_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::imatcopy_batch(
                     main_queue, trans.data(), m.data(), n.data(), alpha.data(), ab_array.data(),
                     lda.data(), ldb.data(), group_count, group_size.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::imatcopy_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::imatcopy_batch(
                     main_queue, trans.data(), m.data(), n.data(), alpha.data(), ab_array.data(),
                     lda.data(), ldb.data(), group_count, group_size.data(), dependencies);
                 break;
@@ -170,14 +170,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::imatcopy_batch,
-                                        trans.data(), m.data(), n.data(), alpha.data(),
-                                        ab_array.data(), lda.data(), ldb.data(), group_count,
-                                        group_size.data(), dependencies);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(
+                    main_queue, oneapi::math::blas::column_major::imatcopy_batch, trans.data(),
+                    m.data(), n.data(), alpha.data(), ab_array.data(), lda.data(), ldb.data(),
+                    group_count, group_size.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::imatcopy_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy_batch,
                                         trans.data(), m.data(), n.data(), alpha.data(),
                                         ab_array.data(), lda.data(), ldb.data(), group_count,
                                         group_size.data(), dependencies);
@@ -193,12 +193,12 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(ab_array[idx], cxt);
-                oneapi::mkl::free_shared(ab_ref_array[idx], cxt);
+                oneapi::math::free_shared(ab_array[idx], cxt);
+                oneapi::math::free_shared(ab_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -215,23 +215,23 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
+            case oneapi::math::layout::col_major:
                 size_a = lda[i] * n[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
                 break;
-            case oneapi::mkl::layout::row_major:
+            case oneapi::math::layout::row_major:
                 size_a = lda[i] * m[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
                 break;
             default: break;
         }
         size = std::max(size_a, size_b);
         for (j = 0; j < group_size[i]; j++) {
-            good = good &&
-                   check_equal_matrix(ab_array[idx], ab_ref_array[idx],
-                                      oneapi::mkl::layout::col_major, size, 1, size, 10, std::cout);
+            good = good && check_equal_matrix(ab_array[idx], ab_ref_array[idx],
+                                              oneapi::math::layout::col_major, size, 1, size, 10,
+                                              std::cout);
             idx++;
         }
     }
@@ -239,8 +239,8 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(ab_array[idx], cxt);
-            oneapi::mkl::free_shared(ab_ref_array[idx], cxt);
+            oneapi::math::free_shared(ab_array[idx], cxt);
+            oneapi::math::free_shared(ab_ref_array[idx], cxt);
             idx++;
         }
     }
@@ -249,7 +249,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 }
 
 class ImatcopyBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(ImatcopyBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -275,8 +275,8 @@ TEST_P(ImatcopyBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(ImatcopyBatchUsmTestSuite, ImatcopyBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp b/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp
index f036d0bbb..2da2de4ab 100644
--- a/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb, ldc;
-    oneapi::mkl::transpose transa, transb;
+    oneapi::math::transpose transa, transb;
     fp alpha, beta;
     int64_t i, tmp;
 
@@ -70,14 +70,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     int64_t stride_a, stride_b, stride_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * n : lda * m;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+        case oneapi::math::layout::col_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * n : lda * m;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             stride_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * m : lda * n;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+        case oneapi::math::layout::row_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * m : lda * n;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             stride_c = ldc * m;
             break;
         default: break;
@@ -86,13 +86,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     vector<fp, allocator_helper<fp, 64>> A(stride_a * batch_size), B(stride_b * batch_size),
         C(stride_c * batch_size), C_ref(stride_c * batch_size);
 
-    rand_matrix(A.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(A.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_a * batch_size, 1, stride_a * batch_size);
-    rand_matrix(B.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(B.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_b * batch_size, 1, stride_b * batch_size);
-    rand_matrix(C.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size);
-    copy_matrix(C.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size, C_ref.data());
 
     // Call reference OMATADD_BATCH_STRIDE.
@@ -132,13 +132,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::omatadd_batch(
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::omatadd_batch(
                     main_queue, transa, transb, m, n, alpha, A_buffer, lda, stride_a, beta,
                     B_buffer, ldb, stride_b, C_buffer, ldc, stride_c, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::omatadd_batch(
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::omatadd_batch(
                     main_queue, transa, transb, m, n, alpha, A_buffer, lda, stride_a, beta,
                     B_buffer, ldb, stride_b, C_buffer, ldc, stride_c, batch_size);
                 break;
@@ -146,14 +146,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatadd_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatadd_batch,
                                         transa, transb, m, n, alpha, A_buffer, lda, stride_a, beta,
                                         B_buffer, ldb, stride_b, C_buffer, ldc, stride_c,
                                         batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatadd_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatadd_batch,
                                         transa, transb, m, n, alpha, A_buffer, lda, stride_a, beta,
                                         B_buffer, ldb, stride_b, C_buffer, ldc, stride_c,
                                         batch_size);
@@ -168,7 +168,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -180,14 +180,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     auto C_accessor = C_buffer.get_host_access(read_only);
-    bool good = check_equal_matrix(C_accessor, C_ref, oneapi::mkl::layout::col_major,
+    bool good = check_equal_matrix(C_accessor, C_ref, oneapi::math::layout::col_major,
                                    stride_c * batch_size, 1, stride_c * batch_size, 10, std::cout);
 
     return (int)good;
 }
 
 class OmataddBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmataddBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -213,8 +213,8 @@ TEST_P(OmataddBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmataddBatchStrideTestSuite, OmataddBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp
index 59cd4ced3..2f0deb8b4 100644
--- a/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -71,7 +71,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb, ldc;
-    oneapi::mkl::transpose transa, transb;
+    oneapi::math::transpose transa, transb;
     fp alpha, beta;
     int64_t i, tmp;
 
@@ -89,14 +89,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     int64_t stride_a, stride_b, stride_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * n : lda * m;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+        case oneapi::math::layout::col_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * n : lda * m;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             stride_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            stride_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * m : lda * n;
-            stride_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+        case oneapi::math::layout::row_major:
+            stride_a = (transa == oneapi::math::transpose::nontrans) ? lda * m : lda * n;
+            stride_b = (transb == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             stride_c = ldc * m;
             break;
         default: break;
@@ -110,17 +110,17 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     C.resize(stride_c * batch_size);
     C_ref.resize(stride_c * batch_size);
 
-    fp** a_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** b_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** c_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** c_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** a_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** b_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** c_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** c_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
 
     if ((a_array == NULL) || (b_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) {
         std::cout << "Error cannot allocate arrays of pointers\n";
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(b_array, cxt);
-        oneapi::mkl::free_shared(c_array, cxt);
-        oneapi::mkl::free_shared(c_ref_array, cxt);
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(b_array, cxt);
+        oneapi::math::free_shared(c_array, cxt);
+        oneapi::math::free_shared(c_ref_array, cxt);
         return false;
     }
 
@@ -131,13 +131,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         c_ref_array[i] = &C_ref[i * stride_c];
     }
 
-    rand_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_a * batch_size, 1, stride_a * batch_size);
-    rand_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_b * batch_size, 1, stride_b * batch_size);
-    rand_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size);
-    copy_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size, C_ref);
 
     // Call reference OMATADD_BATCH_STRIDE.
@@ -156,13 +156,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::omatadd_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::omatadd_batch(
                     main_queue, transa, transb, m, n, alpha, &A[0], lda, stride_a, beta, &B[0], ldb,
                     stride_b, &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::omatadd_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::omatadd_batch(
                     main_queue, transa, transb, m, n, alpha, &A[0], lda, stride_a, beta, &B[0], ldb,
                     stride_b, &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
@@ -171,14 +171,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatadd_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatadd_batch,
                                         transa, transb, m, n, alpha, &A[0], lda, stride_a, beta,
                                         &B[0], ldb, stride_b, &C[0], ldc, stride_c, batch_size,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatadd_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatadd_batch,
                                         transa, transb, m, n, alpha, &A[0], lda, stride_a, beta,
                                         &B[0], ldb, stride_b, &C[0], ldc, stride_c, batch_size,
                                         dependencies);
@@ -194,11 +194,11 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(b_array, cxt);
-        oneapi::mkl::free_shared(c_array, cxt);
-        oneapi::mkl::free_shared(c_ref_array, cxt);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(b_array, cxt);
+        oneapi::math::free_shared(c_array, cxt);
+        oneapi::math::free_shared(c_ref_array, cxt);
         return test_skipped;
     }
 
@@ -208,19 +208,19 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(C, C_ref, oneapi::mkl::layout::col_major, stride_c * batch_size,
+    bool good = check_equal_matrix(C, C_ref, oneapi::math::layout::col_major, stride_c * batch_size,
                                    1, stride_c * batch_size, 10, std::cout);
 
-    oneapi::mkl::free_shared(a_array, cxt);
-    oneapi::mkl::free_shared(b_array, cxt);
-    oneapi::mkl::free_shared(c_array, cxt);
-    oneapi::mkl::free_shared(c_ref_array, cxt);
+    oneapi::math::free_shared(a_array, cxt);
+    oneapi::math::free_shared(b_array, cxt);
+    oneapi::math::free_shared(c_array, cxt);
+    oneapi::math::free_shared(c_ref_array, cxt);
 
     return (int)good;
 }
 
 class OmataddBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmataddBatchStrideUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -246,8 +246,8 @@ TEST_P(OmataddBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmataddBatchStrideUsmTestSuite, OmataddBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp
index 16b407890..9ef8cbdb6 100644
--- a/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i, tmp;
 
@@ -67,13 +67,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     int64_t stride_a, stride_b;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             stride_a = lda * n;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             stride_a = lda * m;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
@@ -82,7 +82,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         B_ref(stride_b * batch_size);
 
     for (i = 0; i < batch_size; i++) {
-        rand_matrix(A.data() + stride_a * i, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+        rand_matrix(A.data() + stride_a * i, layout, oneapi::math::transpose::nontrans, m, n, lda);
         rand_matrix(B.data() + stride_b * i, layout, trans, m, n, ldb);
     }
 
@@ -121,27 +121,27 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::omatcopy_batch(main_queue, trans, m, n, alpha,
-                                                                A_buffer, lda, stride_a, B_buffer,
-                                                                ldb, stride_b, batch_size);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::omatcopy_batch(main_queue, trans, m, n, alpha,
+                                                                 A_buffer, lda, stride_a, B_buffer,
+                                                                 ldb, stride_b, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::omatcopy_batch(main_queue, trans, m, n, alpha,
-                                                             A_buffer, lda, stride_a, B_buffer, ldb,
-                                                             stride_b, batch_size);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::omatcopy_batch(main_queue, trans, m, n, alpha,
+                                                              A_buffer, lda, stride_a, B_buffer,
+                                                              ldb, stride_b, batch_size);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy_batch,
-                                        trans, m, n, alpha, A_buffer, lda, stride_a, B_buffer, ldb,
-                                        stride_b, batch_size);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(
+                    main_queue, oneapi::math::blas::column_major::omatcopy_batch, trans, m, n,
+                    alpha, A_buffer, lda, stride_a, B_buffer, ldb, stride_b, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy_batch,
                                         trans, m, n, alpha, A_buffer, lda, stride_a, B_buffer, ldb,
                                         stride_b, batch_size);
                 break;
@@ -155,7 +155,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -167,14 +167,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     auto B_accessor = B_buffer.get_host_access(read_only);
-    bool good = check_equal_matrix(B_accessor, B_ref, oneapi::mkl::layout::col_major,
+    bool good = check_equal_matrix(B_accessor, B_ref, oneapi::math::layout::col_major,
                                    stride_b * batch_size, 1, stride_b * batch_size, 10, std::cout);
 
     return (int)good;
 }
 
 class OmatcopyBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmatcopyBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -200,8 +200,8 @@ TEST_P(OmatcopyBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmatcopyBatchStrideTestSuite, OmatcopyBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp
index 9533a3030..321c9b3bb 100644
--- a/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp
@@ -32,9 +32,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -49,7 +49,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,7 +72,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i, tmp;
 
@@ -87,13 +87,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     int64_t stride_a, stride_b;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             stride_a = lda * n;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             stride_a = lda * m;
-            stride_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            stride_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
@@ -105,15 +105,15 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     B.resize(stride_b * batch_size);
     B_ref.resize(stride_b * batch_size);
 
-    fp** a_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** b_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** b_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** a_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** b_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** b_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
 
     if ((a_array == NULL) || (b_array == NULL) || (b_ref_array == NULL)) {
         std::cout << "Error cannot allocate arrays of pointers\n";
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(b_array, cxt);
-        oneapi::mkl::free_shared(b_ref_array, cxt);
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(b_array, cxt);
+        oneapi::math::free_shared(b_ref_array, cxt);
         return false;
     }
 
@@ -123,11 +123,11 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         b_ref_array[i] = &B_ref[i * stride_b];
     }
 
-    rand_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_a * batch_size, 1, stride_a * batch_size);
-    rand_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_b * batch_size, 1, stride_b * batch_size);
-    copy_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_b * batch_size, 1, stride_b * batch_size, B_ref);
 
     // Call reference OMATCOPY_BATCH_STRIDE.
@@ -145,13 +145,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::omatcopy_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::omatcopy_batch(
                     main_queue, trans, m, n, alpha, &A[0], lda, stride_a, &B[0], ldb, stride_b,
                     batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::omatcopy_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::omatcopy_batch(
                     main_queue, trans, m, n, alpha, &A[0], lda, stride_a, &B[0], ldb, stride_b,
                     batch_size, dependencies);
                 break;
@@ -160,13 +160,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy_batch,
-                                        trans, m, n, alpha, &A[0], lda, stride_a, &B[0], ldb,
-                                        stride_b, batch_size, dependencies);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(
+                    main_queue, oneapi::math::blas::column_major::omatcopy_batch, trans, m, n,
+                    alpha, &A[0], lda, stride_a, &B[0], ldb, stride_b, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy_batch,
                                         trans, m, n, alpha, &A[0], lda, stride_a, &B[0], ldb,
                                         stride_b, batch_size, dependencies);
                 break;
@@ -181,10 +181,10 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(b_array, cxt);
-        oneapi::mkl::free_shared(b_ref_array, cxt);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(b_array, cxt);
+        oneapi::math::free_shared(b_ref_array, cxt);
         return test_skipped;
     }
 
@@ -194,18 +194,18 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(B, B_ref, oneapi::mkl::layout::col_major, stride_b * batch_size,
+    bool good = check_equal_matrix(B, B_ref, oneapi::math::layout::col_major, stride_b * batch_size,
                                    1, stride_b * batch_size, 10, std::cout);
 
-    oneapi::mkl::free_shared(a_array, cxt);
-    oneapi::mkl::free_shared(b_array, cxt);
-    oneapi::mkl::free_shared(b_ref_array, cxt);
+    oneapi::math::free_shared(a_array, cxt);
+    oneapi::math::free_shared(b_array, cxt);
+    oneapi::math::free_shared(b_ref_array, cxt);
 
     return (int)good;
 }
 
 class OmatcopyBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmatcopyBatchStrideUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -231,8 +231,8 @@ TEST_P(OmatcopyBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmatcopyBatchStrideUsmTestSuite, OmatcopyBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp
index e0eb3feaa..cc8be6fe8 100644
--- a/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp
@@ -32,9 +32,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -49,7 +49,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -73,8 +73,8 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     auto uaint = usm_allocator<int64_t, usm::alloc::shared, 64>(cxt, *dev);
     vector<int64_t, decltype(uaint)> m(uaint), n(uaint), lda(uaint), ldb(uaint), group_size(uaint);
 
-    auto uatranspose = usm_allocator<oneapi::mkl::transpose, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::transpose, decltype(uatranspose)> trans(uatranspose);
+    auto uatranspose = usm_allocator<oneapi::math::transpose, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::transpose, decltype(uatranspose)> trans(uatranspose);
 
     auto uafp = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(uafp)> alpha(uafp);
@@ -113,28 +113,28 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
+            case oneapi::math::layout::col_major:
                 size_a = lda[i] * n[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
                 break;
-            case oneapi::mkl::layout::row_major:
+            case oneapi::math::layout::row_major:
                 size_a = lda[i] * m[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
                 break;
             default: break;
         }
         for (j = 0; j < group_size[i]; j++) {
-            a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
-            b_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
-            b_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
-            rand_matrix(a_array[idx], oneapi::mkl::layout::col_major,
-                        oneapi::mkl::transpose::nontrans, size_a, 1, size_a);
-            rand_matrix(b_array[idx], oneapi::mkl::layout::col_major,
-                        oneapi::mkl::transpose::nontrans, size_b, 1, size_b);
-            copy_matrix(b_array[idx], oneapi::mkl::layout::col_major,
-                        oneapi::mkl::transpose::nontrans, size_b, 1, size_b, b_ref_array[idx]);
+            a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
+            b_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
+            b_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
+            rand_matrix(a_array[idx], oneapi::math::layout::col_major,
+                        oneapi::math::transpose::nontrans, size_a, 1, size_a);
+            rand_matrix(b_array[idx], oneapi::math::layout::col_major,
+                        oneapi::math::transpose::nontrans, size_b, 1, size_b);
+            copy_matrix(b_array[idx], oneapi::math::layout::col_major,
+                        oneapi::math::transpose::nontrans, size_b, 1, size_b, b_ref_array[idx]);
             idx++;
         }
     }
@@ -158,14 +158,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::omatcopy_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::omatcopy_batch(
                     main_queue, trans.data(), m.data(), n.data(), alpha.data(),
                     (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count,
                     group_size.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::omatcopy_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::omatcopy_batch(
                     main_queue, trans.data(), m.data(), n.data(), alpha.data(),
                     (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count,
                     group_size.data(), dependencies);
@@ -175,14 +175,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy_batch,
-                                        trans.data(), m.data(), n.data(), alpha.data(),
-                                        (const fp**)a_array.data(), lda.data(), b_array.data(),
-                                        ldb.data(), group_count, group_size.data(), dependencies);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(
+                    main_queue, oneapi::math::blas::column_major::omatcopy_batch, trans.data(),
+                    m.data(), n.data(), alpha.data(), (const fp**)a_array.data(), lda.data(),
+                    b_array.data(), ldb.data(), group_count, group_size.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy_batch,
                                         trans.data(), m.data(), n.data(), alpha.data(),
                                         (const fp**)a_array.data(), lda.data(), b_array.data(),
                                         ldb.data(), group_count, group_size.data(), dependencies);
@@ -198,13 +198,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(b_array[idx], cxt);
-                oneapi::mkl::free_shared(b_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(b_array[idx], cxt);
+                oneapi::math::free_shared(b_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -221,22 +221,22 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
+            case oneapi::math::layout::col_major:
                 size_a = lda[i] * n[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * n[i] : ldb[i] * m[i];
                 break;
-            case oneapi::mkl::layout::row_major:
+            case oneapi::math::layout::row_major:
                 size_a = lda[i] * m[i];
                 size_b =
-                    (trans[i] == oneapi::mkl::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
+                    (trans[i] == oneapi::math::transpose::nontrans) ? ldb[i] * m[i] : ldb[i] * n[i];
                 break;
             default: break;
         }
         for (j = 0; j < group_size[i]; j++) {
             good = good && check_equal_matrix(b_array[idx], b_ref_array[idx],
-                                              oneapi::mkl::layout::col_major, size_b, 1, size_b, 10,
-                                              std::cout);
+                                              oneapi::math::layout::col_major, size_b, 1, size_b,
+                                              10, std::cout);
             idx++;
         }
     }
@@ -244,9 +244,9 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(a_array[idx], cxt);
-            oneapi::mkl::free_shared(b_array[idx], cxt);
-            oneapi::mkl::free_shared(b_ref_array[idx], cxt);
+            oneapi::math::free_shared(a_array[idx], cxt);
+            oneapi::math::free_shared(b_array[idx], cxt);
+            oneapi::math::free_shared(b_ref_array[idx], cxt);
             idx++;
         }
     }
@@ -255,7 +255,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 }
 
 class OmatcopyBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmatcopyBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -281,8 +281,8 @@ TEST_P(OmatcopyBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmatcopyBatchUsmTestSuite, OmatcopyBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/syrk_batch_stride.cpp b/tests/unit_tests/blas/batch/syrk_batch_stride.cpp
index aeb33c42e..8470a9139 100644
--- a/tests/unit_tests/blas/batch/syrk_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/syrk_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,12 +48,12 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t n, k;
     int64_t lda, ldc;
-    oneapi::mkl::uplo upper_lower;
-    oneapi::mkl::transpose trans;
+    oneapi::math::uplo upper_lower;
+    oneapi::math::transpose trans;
     fp alpha, beta;
     int64_t i, tmp;
 
@@ -65,26 +65,26 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     alpha = rand_scalar<fp>();
     beta = rand_scalar<fp>();
 
-    upper_lower = (oneapi::mkl::uplo)(std::rand() % 2);
+    upper_lower = (oneapi::math::uplo)(std::rand() % 2);
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        trans = (std::rand() % 2) == 0   ? oneapi::mkl::transpose::nontrans
-                : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans
-                                         : oneapi::mkl::transpose::conjtrans;
+        trans = (std::rand() % 2) == 0   ? oneapi::math::transpose::nontrans
+                : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans
+                                         : oneapi::math::transpose::conjtrans;
     }
     else {
-        trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans
-                                       : oneapi::mkl::transpose::trans;
+        trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans
+                                       : oneapi::math::transpose::trans;
     }
 
     int64_t stride_a, stride_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            stride_a = (trans == oneapi::mkl::transpose::nontrans) ? lda * k : lda * n;
+        case oneapi::math::layout::col_major:
+            stride_a = (trans == oneapi::math::transpose::nontrans) ? lda * k : lda * n;
             stride_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            stride_a = (trans == oneapi::mkl::transpose::nontrans) ? lda * n : lda * k;
+        case oneapi::math::layout::row_major:
+            stride_a = (trans == oneapi::math::transpose::nontrans) ? lda * n : lda * k;
             stride_c = ldc * n;
             break;
         default: break;
@@ -95,7 +95,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
 
     for (i = 0; i < batch_size; i++) {
         rand_matrix(A.data() + stride_a * i, layout, trans, n, k, lda);
-        rand_matrix(C.data() + stride_c * i, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+        rand_matrix(C.data() + stride_c * i, layout, oneapi::math::transpose::nontrans, n, n, ldc);
     }
 
     C_ref = C;
@@ -140,27 +140,27 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::syrk_batch(main_queue, upper_lower, trans, n, k,
-                                                            alpha, A_buffer, lda, stride_a, beta,
-                                                            C_buffer, ldc, stride_c, batch_size);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::syrk_batch(main_queue, upper_lower, trans, n, k,
+                                                             alpha, A_buffer, lda, stride_a, beta,
+                                                             C_buffer, ldc, stride_c, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::syrk_batch(main_queue, upper_lower, trans, n, k,
-                                                         alpha, A_buffer, lda, stride_a, beta,
-                                                         C_buffer, ldc, stride_c, batch_size);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::syrk_batch(main_queue, upper_lower, trans, n, k,
+                                                          alpha, A_buffer, lda, stride_a, beta,
+                                                          C_buffer, ldc, stride_c, batch_size);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syrk_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syrk_batch,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, stride_a,
                                         beta, C_buffer, ldc, stride_c, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syrk_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk_batch,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, stride_a,
                                         beta, C_buffer, ldc, stride_c, batch_size);
                 break;
@@ -174,7 +174,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -187,14 +187,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
 
     auto C_accessor = C_buffer.get_host_access(read_only);
     bool good =
-        check_equal_matrix(C_accessor, C_ref, oneapi::mkl::layout::col_major, stride_c * batch_size,
-                           1, stride_c * batch_size, 10 * k, std::cout);
+        check_equal_matrix(C_accessor, C_ref, oneapi::math::layout::col_major,
+                           stride_c * batch_size, 1, stride_c * batch_size, 10 * k, std::cout);
 
     return (int)good;
 }
 
 class SyrkBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SyrkBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -220,8 +220,8 @@ TEST_P(SyrkBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(SyrkBatchStrideTestSuite, SyrkBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp
index b1f66fa07..4736ae1a2 100644
--- a/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
+int test(device* dev, oneapi::math::layout layout, int64_t batch_size) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -71,8 +71,8 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     // Prepare data.
     int64_t n, k;
     int64_t lda, ldc;
-    oneapi::mkl::uplo upper_lower;
-    oneapi::mkl::transpose trans;
+    oneapi::math::uplo upper_lower;
+    oneapi::math::transpose trans;
     fp alpha, beta;
 
     int64_t i, tmp;
@@ -84,26 +84,26 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     ldc = std::max(n, n);
     alpha = rand_scalar<fp>();
     beta = rand_scalar<fp>();
-    upper_lower = (oneapi::mkl::uplo)(std::rand() % 2);
+    upper_lower = (oneapi::math::uplo)(std::rand() % 2);
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        trans = (std::rand() % 2) == 0   ? oneapi::mkl::transpose::nontrans
-                : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans
-                                         : oneapi::mkl::transpose::conjtrans;
+        trans = (std::rand() % 2) == 0   ? oneapi::math::transpose::nontrans
+                : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans
+                                         : oneapi::math::transpose::conjtrans;
     }
     else {
-        trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans
-                                       : oneapi::mkl::transpose::trans;
+        trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans
+                                       : oneapi::math::transpose::trans;
     }
 
     int64_t stride_a, stride_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            stride_a = (trans == oneapi::mkl::transpose::nontrans) ? lda * k : lda * n;
+        case oneapi::math::layout::col_major:
+            stride_a = (trans == oneapi::math::transpose::nontrans) ? lda * k : lda * n;
             stride_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            stride_a = (trans == oneapi::mkl::transpose::nontrans) ? lda * n : lda * k;
+        case oneapi::math::layout::row_major:
+            stride_a = (trans == oneapi::math::transpose::nontrans) ? lda * n : lda * k;
             stride_c = ldc * n;
             break;
         default: break;
@@ -116,15 +116,15 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     C.resize(stride_c * batch_size);
     C_ref.resize(stride_c * batch_size);
 
-    fp** a_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** c_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
-    fp** c_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** a_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** c_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
+    fp** c_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt);
 
     if ((a_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) {
         std::cout << "Error cannot allocate arrays of pointers\n";
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(c_array, cxt);
-        oneapi::mkl::free_shared(c_ref_array, cxt);
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(c_array, cxt);
+        oneapi::math::free_shared(c_ref_array, cxt);
         return false;
     }
 
@@ -134,11 +134,11 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         c_ref_array[i] = &C_ref[i * stride_c];
     }
 
-    rand_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_a * batch_size, 1, stride_a * batch_size);
-    rand_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    rand_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size);
-    copy_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans,
+    copy_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
                 stride_c * batch_size, 1, stride_c * batch_size, C_ref);
 
     // Call reference SYRK_BATCH_STRIDE.
@@ -161,13 +161,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::syrk_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::syrk_batch(
                     main_queue, upper_lower, trans, n, k, alpha, &A[0], lda, stride_a, beta, &C[0],
                     ldc, stride_c, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::syrk_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::syrk_batch(
                     main_queue, upper_lower, trans, n, k, alpha, &A[0], lda, stride_a, beta, &C[0],
                     ldc, stride_c, batch_size, dependencies);
                 break;
@@ -176,13 +176,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syrk_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syrk_batch,
                                         upper_lower, trans, n, k, alpha, &A[0], lda, stride_a, beta,
                                         &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syrk_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk_batch,
                                         upper_lower, trans, n, k, alpha, &A[0], lda, stride_a, beta,
                                         &C[0], ldc, stride_c, batch_size, dependencies);
                 break;
@@ -197,10 +197,10 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::free_shared(a_array, cxt);
-        oneapi::mkl::free_shared(c_array, cxt);
-        oneapi::mkl::free_shared(c_ref_array, cxt);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::free_shared(a_array, cxt);
+        oneapi::math::free_shared(c_array, cxt);
+        oneapi::math::free_shared(c_ref_array, cxt);
         return test_skipped;
     }
 
@@ -210,18 +210,18 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(C, C_ref, oneapi::mkl::layout::col_major, stride_c * batch_size,
+    bool good = check_equal_matrix(C, C_ref, oneapi::math::layout::col_major, stride_c * batch_size,
                                    1, stride_c * batch_size, 10 * k, std::cout);
 
-    oneapi::mkl::free_shared(a_array, cxt);
-    oneapi::mkl::free_shared(c_array, cxt);
-    oneapi::mkl::free_shared(c_ref_array, cxt);
+    oneapi::math::free_shared(a_array, cxt);
+    oneapi::math::free_shared(c_array, cxt);
+    oneapi::math::free_shared(c_ref_array, cxt);
 
     return (int)good;
 }
 
 class SyrkBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SyrkBatchStrideUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -247,8 +247,8 @@ TEST_P(SyrkBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(SyrkBatchStrideUsmTestSuite, SyrkBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/syrk_batch_usm.cpp b/tests/unit_tests/blas/batch/syrk_batch_usm.cpp
index b331b4c66..ef117619b 100644
--- a/tests/unit_tests/blas/batch/syrk_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/syrk_batch_usm.cpp
@@ -30,10 +30,10 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 #include "allocator_helper.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,11 +72,11 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     auto uaint = usm_allocator<int64_t, usm::alloc::shared, 64>(cxt, *dev);
     vector<int64_t, decltype(uaint)> n(uaint), k(uaint), lda(uaint), ldc(uaint), group_size(uaint);
 
-    auto uauplo = usm_allocator<oneapi::mkl::uplo, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::uplo, decltype(uauplo)> upper_lower(uauplo);
+    auto uauplo = usm_allocator<oneapi::math::uplo, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::uplo, decltype(uauplo)> upper_lower(uauplo);
 
-    auto uatranspose = usm_allocator<oneapi::mkl::transpose, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::transpose, decltype(uatranspose)> trans(uatranspose);
+    auto uatranspose = usm_allocator<oneapi::math::transpose, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::transpose, decltype(uatranspose)> trans(uatranspose);
 
     auto uafp = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(uafp)> alpha(uafp), beta(uafp);
@@ -104,15 +104,15 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         ldc[i] = std::max(n[i], n[i]);
         alpha[i] = rand_scalar<fp>();
         beta[i] = rand_scalar<fp>();
-        upper_lower[i] = (oneapi::mkl::uplo)(std::rand() % 2);
+        upper_lower[i] = (oneapi::math::uplo)(std::rand() % 2);
         if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-            trans[i] = (std::rand() % 2) == 0   ? oneapi::mkl::transpose::nontrans
-                       : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans
-                                                : oneapi::mkl::transpose::conjtrans;
+            trans[i] = (std::rand() % 2) == 0   ? oneapi::math::transpose::nontrans
+                       : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans
+                                                : oneapi::math::transpose::conjtrans;
         }
         else {
-            trans[i] = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans
-                                              : oneapi::mkl::transpose::trans;
+            trans[i] = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans
+                                              : oneapi::math::transpose::trans;
         }
         total_batch_count += group_size[i];
     }
@@ -126,23 +126,24 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     idx = 0;
     for (i = 0; i < group_count; i++) {
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                size_a = lda[i] * ((trans[i] == oneapi::mkl::transpose::nontrans) ? k[i] : n[i]);
+            case oneapi::math::layout::col_major:
+                size_a = lda[i] * ((trans[i] == oneapi::math::transpose::nontrans) ? k[i] : n[i]);
                 size_c = ldc[i] * n[i];
                 break;
-            case oneapi::mkl::layout::row_major:
-                size_a = lda[i] * ((trans[i] == oneapi::mkl::transpose::nontrans) ? n[i] : k[i]);
+            case oneapi::math::layout::row_major:
+                size_a = lda[i] * ((trans[i] == oneapi::math::transpose::nontrans) ? n[i] : k[i]);
                 size_c = ldc[i] * n[i];
                 break;
             default: break;
         }
         for (j = 0; j < group_size[i]; j++) {
-            a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
-            c_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
-            c_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
+            a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
+            c_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
+            c_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt);
             rand_matrix(a_array[idx], layout, trans[i], n[i], k[i], lda[i]);
-            rand_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, n[i], n[i], ldc[i]);
-            copy_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, n[i], n[i], ldc[i],
+            rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, n[i], n[i],
+                        ldc[i]);
+            copy_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, n[i], n[i], ldc[i],
                         c_ref_array[idx]);
             idx++;
         }
@@ -150,33 +151,33 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     // Call reference SYRK_BATCH.
     using fp_ref = typename ref_type_info<fp>::type;
-    int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* k_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* ldc_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
+    int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* k_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* ldc_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
 
     CBLAS_UPLO* upper_lower_ref =
-        (CBLAS_UPLO*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count);
+        (CBLAS_UPLO*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count);
     CBLAS_TRANSPOSE* trans_ref =
-        (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
+        (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
 
     if ((n_ref == NULL) || (k_ref == NULL) || (lda_ref == NULL) || (ldc_ref == NULL) ||
         (trans_ref == NULL) || (upper_lower_ref == NULL) || (group_size_ref == NULL)) {
         std::cout << "Error cannot allocate input arrays\n";
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(k_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(ldc_ref);
-        oneapi::mkl::aligned_free(trans_ref);
-        oneapi::mkl::aligned_free(upper_lower_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(k_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(ldc_ref);
+        oneapi::math::aligned_free(trans_ref);
+        oneapi::math::aligned_free(upper_lower_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(c_array[idx], cxt);
-                oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(c_array[idx], cxt);
+                oneapi::math::free_shared(c_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -205,14 +206,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::syrk_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::syrk_batch(
                     main_queue, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0],
                     (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count,
                     &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::syrk_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::syrk_batch(
                     main_queue, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0],
                     (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count,
                     &group_size[0], dependencies);
@@ -222,14 +223,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syrk_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syrk_batch,
                                         &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0],
                                         (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0],
                                         &ldc[0], group_count, &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syrk_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk_batch,
                                         &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0],
                                         (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0],
                                         &ldc[0], group_count, &group_size[0], dependencies);
@@ -245,20 +246,20 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(k_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(ldc_ref);
-        oneapi::mkl::aligned_free(upper_lower_ref);
-        oneapi::mkl::aligned_free(trans_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(k_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(ldc_ref);
+        oneapi::math::aligned_free(upper_lower_ref);
+        oneapi::math::aligned_free(trans_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(c_array[idx], cxt);
-                oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(c_array[idx], cxt);
+                oneapi::math::free_shared(c_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -279,19 +280,19 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
             idx++;
         }
     }
-    oneapi::mkl::aligned_free(n_ref);
-    oneapi::mkl::aligned_free(k_ref);
-    oneapi::mkl::aligned_free(lda_ref);
-    oneapi::mkl::aligned_free(ldc_ref);
-    oneapi::mkl::aligned_free(upper_lower_ref);
-    oneapi::mkl::aligned_free(trans_ref);
-    oneapi::mkl::aligned_free(group_size_ref);
+    oneapi::math::aligned_free(n_ref);
+    oneapi::math::aligned_free(k_ref);
+    oneapi::math::aligned_free(lda_ref);
+    oneapi::math::aligned_free(ldc_ref);
+    oneapi::math::aligned_free(upper_lower_ref);
+    oneapi::math::aligned_free(trans_ref);
+    oneapi::math::aligned_free(group_size_ref);
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(a_array[idx], cxt);
-            oneapi::mkl::free_shared(c_array[idx], cxt);
-            oneapi::mkl::free_shared(c_ref_array[idx], cxt);
+            oneapi::math::free_shared(a_array[idx], cxt);
+            oneapi::math::free_shared(c_array[idx], cxt);
+            oneapi::math::free_shared(c_ref_array[idx], cxt);
             idx++;
         }
     }
@@ -300,7 +301,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 }
 
 class SyrkBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SyrkBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -326,8 +327,8 @@ TEST_P(SyrkBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(SyrkBatchUsmTestSuite, SyrkBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/trsm_batch_stride.cpp b/tests/unit_tests/blas/batch/trsm_batch_stride.cpp
index c85e7a885..37fd56886 100644
--- a/tests/unit_tests/blas/batch/trsm_batch_stride.cpp
+++ b/tests/unit_tests/blas/batch/trsm_batch_stride.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,14 +48,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
-    oneapi::mkl::side left_right;
-    oneapi::mkl::uplo upper_lower;
-    oneapi::mkl::diag unit_nonunit;
+    oneapi::math::transpose trans;
+    oneapi::math::side left_right;
+    oneapi::math::uplo upper_lower;
+    oneapi::math::diag unit_nonunit;
     fp alpha;
     int64_t batch_size;
     int64_t i, tmp;
@@ -68,26 +68,26 @@ int test(device* dev, oneapi::mkl::layout layout) {
     alpha = rand_scalar<fp>();
 
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        trans = (oneapi::mkl::transpose)(std::rand() % 2);
+        trans = (oneapi::math::transpose)(std::rand() % 2);
     }
     else {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            trans = oneapi::mkl::transpose::conjtrans;
+            trans = oneapi::math::transpose::conjtrans;
         else
-            trans = (oneapi::mkl::transpose)tmp;
+            trans = (oneapi::math::transpose)tmp;
     }
-    left_right = (oneapi::mkl::side)(std::rand() % 2);
-    upper_lower = (oneapi::mkl::uplo)(std::rand() % 2);
-    unit_nonunit = (oneapi::mkl::diag)(std::rand() % 2);
+    left_right = (oneapi::math::side)(std::rand() % 2);
+    upper_lower = (oneapi::math::uplo)(std::rand() % 2);
+    unit_nonunit = (oneapi::math::diag)(std::rand() % 2);
 
     int64_t stride_a, stride_b;
     int64_t total_size_b;
 
-    stride_a = (left_right == oneapi::mkl::side::left) ? lda * m : lda * n;
+    stride_a = (left_right == oneapi::math::side::left) ? lda * m : lda * n;
     switch (layout) {
-        case oneapi::mkl::layout::col_major: stride_b = ldb * n; break;
-        case oneapi::mkl::layout::row_major: stride_b = ldb * m; break;
+        case oneapi::math::layout::col_major: stride_b = ldb * n; break;
+        case oneapi::math::layout::row_major: stride_b = ldb * m; break;
         default: break;
     }
     total_size_b = batch_size * stride_b;
@@ -96,11 +96,11 @@ int test(device* dev, oneapi::mkl::layout layout) {
         B_ref(total_size_b);
 
     for (i = 0; i < batch_size; i++) {
-        if (left_right == oneapi::mkl::side::left)
+        if (left_right == oneapi::math::side::left)
             rand_trsm_matrix(A.data() + stride_a * i, layout, trans, m, m, lda);
         else
             rand_trsm_matrix(A.data() + stride_a * i, layout, trans, n, n, lda);
-        rand_matrix(B.data() + stride_b * i, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
+        rand_matrix(B.data() + stride_b * i, layout, oneapi::math::transpose::nontrans, m, n, ldb);
     }
 
     B_ref = B;
@@ -145,13 +145,13 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::trsm_batch(
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::trsm_batch(
                     main_queue, left_right, upper_lower, trans, unit_nonunit, m, n, alpha, A_buffer,
                     lda, stride_a, B_buffer, ldb, stride_b, batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::trsm_batch(
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::trsm_batch(
                     main_queue, left_right, upper_lower, trans, unit_nonunit, m, n, alpha, A_buffer,
                     lda, stride_a, B_buffer, ldb, stride_b, batch_size);
                 break;
@@ -159,14 +159,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsm_batch,
                                         left_right, upper_lower, trans, unit_nonunit, m, n, alpha,
                                         A_buffer, lda, stride_a, B_buffer, ldb, stride_b,
                                         batch_size);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsm_batch,
                                         left_right, upper_lower, trans, unit_nonunit, m, n, alpha,
                                         A_buffer, lda, stride_a, B_buffer, ldb, stride_b,
                                         batch_size);
@@ -181,7 +181,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -193,14 +193,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Compare the results of reference implementation and DPC++ implementation.
     auto B_accessor = B_buffer.get_host_access(read_only);
     bool good =
-        check_equal_trsm_matrix(B_accessor, B_ref, oneapi::mkl::layout::col_major, total_size_b, 1,
+        check_equal_trsm_matrix(B_accessor, B_ref, oneapi::math::layout::col_major, total_size_b, 1,
                                 total_size_b, 10 * std::max(m, n), std::cout);
 
     return (int)good;
 }
 
 class TrsmBatchStrideTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TrsmBatchStrideTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -224,8 +224,8 @@ TEST_P(TrsmBatchStrideTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(TrsmBatchStrideTestSuite, TrsmBatchStrideTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp
index 1b518d5bb..0bc331cd5 100644
--- a/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp
+++ b/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -71,10 +71,10 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
-    oneapi::mkl::side left_right;
-    oneapi::mkl::uplo upper_lower;
-    oneapi::mkl::diag unit_nonunit;
+    oneapi::math::transpose trans;
+    oneapi::math::side left_right;
+    oneapi::math::uplo upper_lower;
+    oneapi::math::diag unit_nonunit;
     fp alpha;
     int64_t batch_size;
     int64_t i, tmp;
@@ -87,24 +87,24 @@ int test(device* dev, oneapi::mkl::layout layout) {
     alpha = rand_scalar<fp>();
 
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        trans = (oneapi::mkl::transpose)(std::rand() % 2);
+        trans = (oneapi::math::transpose)(std::rand() % 2);
     }
     else {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            trans = oneapi::mkl::transpose::conjtrans;
+            trans = oneapi::math::transpose::conjtrans;
         else
-            trans = (oneapi::mkl::transpose)tmp;
+            trans = (oneapi::math::transpose)tmp;
     }
-    left_right = (oneapi::mkl::side)(std::rand() % 2);
-    upper_lower = (oneapi::mkl::uplo)(std::rand() % 2);
-    unit_nonunit = (oneapi::mkl::diag)(std::rand() % 2);
+    left_right = (oneapi::math::side)(std::rand() % 2);
+    upper_lower = (oneapi::math::uplo)(std::rand() % 2);
+    unit_nonunit = (oneapi::math::diag)(std::rand() % 2);
 
     int64_t stride_a, stride_b;
     int64_t total_size_b;
 
-    stride_a = (left_right == oneapi::mkl::side::left) ? lda * m : lda * n;
-    stride_b = (layout == oneapi::mkl::layout::col_major) ? ldb * n : ldb * m;
+    stride_a = (left_right == oneapi::math::side::left) ? lda * m : lda * n;
+    stride_b = (layout == oneapi::math::layout::col_major) ? ldb * n : ldb * m;
 
     total_size_b = batch_size * stride_b;
 
@@ -116,14 +116,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     B_ref.resize(total_size_b);
 
     for (i = 0; i < batch_size; i++) {
-        if (left_right == oneapi::mkl::side::left)
+        if (left_right == oneapi::math::side::left)
             rand_trsm_matrix(&A[stride_a * i], layout, trans, m, m, lda);
         else
             rand_trsm_matrix(&A[stride_a * i], layout, trans, n, n, lda);
-        rand_matrix(&B[stride_b * i], layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
+        rand_matrix(&B[stride_b * i], layout, oneapi::math::transpose::nontrans, m, n, ldb);
     }
 
-    copy_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, total_size_b,
+    copy_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, total_size_b,
                 1, total_size_b, B_ref);
 
     // Call reference TRSM_BATCH_STRIDE.
@@ -147,13 +147,13 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::trsm_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::trsm_batch(
                     main_queue, left_right, upper_lower, trans, unit_nonunit, m, n, alpha, &A[0],
                     lda, stride_a, &B[0], ldb, stride_b, batch_size, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::trsm_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::trsm_batch(
                     main_queue, left_right, upper_lower, trans, unit_nonunit, m, n, alpha, &A[0],
                     lda, stride_a, &B[0], ldb, stride_b, batch_size, dependencies);
                 break;
@@ -162,14 +162,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsm_batch,
                                         left_right, upper_lower, trans, unit_nonunit, m, n, alpha,
                                         &A[0], lda, stride_a, &B[0], ldb, stride_b, batch_size,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsm_batch,
                                         left_right, upper_lower, trans, unit_nonunit, m, n, alpha,
                                         &A[0], lda, stride_a, &B[0], ldb, stride_b, batch_size,
                                         dependencies);
@@ -185,7 +185,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -195,14 +195,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_trsm_matrix(B, B_ref, oneapi::mkl::layout::col_major, total_size_b, 1,
+    bool good = check_equal_trsm_matrix(B, B_ref, oneapi::math::layout::col_major, total_size_b, 1,
                                         total_size_b, 10 * std::max(m, n), std::cout);
 
     return (int)good;
 }
 
 class TrsmBatchStrideUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TrsmBatchStrideUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -226,8 +226,8 @@ TEST_P(TrsmBatchStrideUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(TrsmBatchStrideUsmTestSuite, TrsmBatchStrideUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/batch/trsm_batch_usm.cpp b/tests/unit_tests/blas/batch/trsm_batch_usm.cpp
index b7ddff8c8..cb4f06bbc 100644
--- a/tests/unit_tests/blas/batch/trsm_batch_usm.cpp
+++ b/tests/unit_tests/blas/batch/trsm_batch_usm.cpp
@@ -30,10 +30,10 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 #include "allocator_helper.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
+int test(device* dev, oneapi::math::layout layout, int64_t group_count) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,17 +72,17 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     auto uaint = usm_allocator<int64_t, usm::alloc::shared, 64>(cxt, *dev);
     vector<int64_t, decltype(uaint)> m(uaint), n(uaint), lda(uaint), ldb(uaint), group_size(uaint);
 
-    auto uatranspose = usm_allocator<oneapi::mkl::transpose, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::transpose, decltype(uatranspose)> trans(uatranspose);
+    auto uatranspose = usm_allocator<oneapi::math::transpose, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::transpose, decltype(uatranspose)> trans(uatranspose);
 
-    auto uaside = usm_allocator<oneapi::mkl::side, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::side, decltype(uaside)> left_right(uaside);
+    auto uaside = usm_allocator<oneapi::math::side, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::side, decltype(uaside)> left_right(uaside);
 
-    auto uauplo = usm_allocator<oneapi::mkl::uplo, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::uplo, decltype(uauplo)> upper_lower(uauplo);
+    auto uauplo = usm_allocator<oneapi::math::uplo, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::uplo, decltype(uauplo)> upper_lower(uauplo);
 
-    auto uadiag = usm_allocator<oneapi::mkl::diag, usm::alloc::shared, 64>(cxt, *dev);
-    vector<oneapi::mkl::diag, decltype(uadiag)> unit_nonunit(uadiag);
+    auto uadiag = usm_allocator<oneapi::math::diag, usm::alloc::shared, 64>(cxt, *dev);
+    vector<oneapi::math::diag, decltype(uadiag)> unit_nonunit(uadiag);
 
     auto uafp = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(uafp)> alpha(uafp);
@@ -112,18 +112,18 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         ldb[i] = std::max(n[i], m[i]);
         alpha[i] = rand_scalar<fp>();
         if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-            trans[i] = (oneapi::mkl::transpose)(std::rand() % 2);
+            trans[i] = (oneapi::math::transpose)(std::rand() % 2);
         }
         else {
             tmp = std::rand() % 3;
             if (tmp == 2)
-                trans[i] = oneapi::mkl::transpose::conjtrans;
+                trans[i] = oneapi::math::transpose::conjtrans;
             else
-                trans[i] = (oneapi::mkl::transpose)tmp;
+                trans[i] = (oneapi::math::transpose)tmp;
         }
-        left_right[i] = (oneapi::mkl::side)(std::rand() % 2);
-        upper_lower[i] = (oneapi::mkl::uplo)(std::rand() % 2);
-        unit_nonunit[i] = (oneapi::mkl::diag)(std::rand() % 2);
+        left_right[i] = (oneapi::math::side)(std::rand() % 2);
+        upper_lower[i] = (oneapi::math::uplo)(std::rand() % 2);
+        unit_nonunit[i] = (oneapi::math::diag)(std::rand() % 2);
 
         total_batch_count += group_size[i];
     }
@@ -137,16 +137,17 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     idx = 0;
     for (i = 0; i < group_count; i++) {
-        size_a = lda[i] * (left_right[i] == oneapi::mkl::side::left ? m[i] : n[i]);
-        Arank = left_right[i] == oneapi::mkl::side::left ? m[i] : n[i];
-        size_b = ldb[i] * ((layout == oneapi::mkl::layout::col_major) ? n[i] : m[i]);
+        size_a = lda[i] * (left_right[i] == oneapi::math::side::left ? m[i] : n[i]);
+        Arank = left_right[i] == oneapi::math::side::left ? m[i] : n[i];
+        size_b = ldb[i] * ((layout == oneapi::math::layout::col_major) ? n[i] : m[i]);
         for (j = 0; j < group_size[i]; j++) {
-            a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
-            b_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
-            b_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
+            a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt);
+            b_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
+            b_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt);
             rand_trsm_matrix(a_array[idx], layout, trans[i], Arank, Arank, lda[i]);
-            rand_matrix(b_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldb[i]);
-            copy_matrix(b_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldb[i],
+            rand_matrix(b_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i],
+                        ldb[i]);
+            copy_matrix(b_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldb[i],
                         b_ref_array[idx]);
             idx++;
         }
@@ -154,40 +155,40 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 
     // Call reference TRSM_BATCH.
     using fp_ref = typename ref_type_info<fp>::type;
-    int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* ldb_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
-    int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count);
+    int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* ldb_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
+    int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count);
 
     CBLAS_TRANSPOSE* trans_ref =
-        (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
+        (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count);
     CBLAS_SIDE* left_right_ref =
-        (CBLAS_SIDE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count);
+        (CBLAS_SIDE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count);
     CBLAS_UPLO* upper_lower_ref =
-        (CBLAS_UPLO*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count);
+        (CBLAS_UPLO*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count);
     CBLAS_DIAG* unit_nonunit_ref =
-        (CBLAS_DIAG*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_DIAG) * group_count);
+        (CBLAS_DIAG*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_DIAG) * group_count);
 
     if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (ldb_ref == NULL) ||
         (trans_ref == NULL) || (left_right_ref == NULL) || (upper_lower_ref == NULL) ||
         (unit_nonunit_ref == NULL) || (group_size_ref == NULL)) {
         std::cout << "Error cannot allocate input arrays\n";
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(ldb_ref);
-        oneapi::mkl::aligned_free(trans_ref);
-        oneapi::mkl::aligned_free(left_right_ref);
-        oneapi::mkl::aligned_free(upper_lower_ref);
-        oneapi::mkl::aligned_free(unit_nonunit_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(ldb_ref);
+        oneapi::math::aligned_free(trans_ref);
+        oneapi::math::aligned_free(left_right_ref);
+        oneapi::math::aligned_free(upper_lower_ref);
+        oneapi::math::aligned_free(unit_nonunit_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(b_array[idx], cxt);
-                oneapi::mkl::free_shared(b_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(b_array[idx], cxt);
+                oneapi::math::free_shared(b_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -218,14 +219,14 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::trsm_batch(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::trsm_batch(
                     main_queue, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0],
                     &n[0], &alpha[0], (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0],
                     group_count, &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::trsm_batch(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::trsm_batch(
                     main_queue, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0],
                     &n[0], &alpha[0], (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0],
                     group_count, &group_size[0], dependencies);
@@ -235,15 +236,15 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsm_batch,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsm_batch,
                                         &left_right[0], &upper_lower[0], &trans[0],
                                         &unit_nonunit[0], &m[0], &n[0], &alpha[0],
                                         (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0],
                                         group_count, &group_size[0], dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsm_batch,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsm_batch,
                                         &left_right[0], &upper_lower[0], &trans[0],
                                         &unit_nonunit[0], &m[0], &n[0], &alpha[0],
                                         (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0],
@@ -260,22 +261,22 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
-        oneapi::mkl::aligned_free(m_ref);
-        oneapi::mkl::aligned_free(n_ref);
-        oneapi::mkl::aligned_free(lda_ref);
-        oneapi::mkl::aligned_free(ldb_ref);
-        oneapi::mkl::aligned_free(trans_ref);
-        oneapi::mkl::aligned_free(left_right_ref);
-        oneapi::mkl::aligned_free(upper_lower_ref);
-        oneapi::mkl::aligned_free(unit_nonunit_ref);
-        oneapi::mkl::aligned_free(group_size_ref);
+    catch (const oneapi::math::unimplemented& e) {
+        oneapi::math::aligned_free(m_ref);
+        oneapi::math::aligned_free(n_ref);
+        oneapi::math::aligned_free(lda_ref);
+        oneapi::math::aligned_free(ldb_ref);
+        oneapi::math::aligned_free(trans_ref);
+        oneapi::math::aligned_free(left_right_ref);
+        oneapi::math::aligned_free(upper_lower_ref);
+        oneapi::math::aligned_free(unit_nonunit_ref);
+        oneapi::math::aligned_free(group_size_ref);
         idx = 0;
         for (i = 0; i < group_count; i++) {
             for (j = 0; j < group_size[i]; j++) {
-                oneapi::mkl::free_shared(a_array[idx], cxt);
-                oneapi::mkl::free_shared(b_array[idx], cxt);
-                oneapi::mkl::free_shared(b_ref_array[idx], cxt);
+                oneapi::math::free_shared(a_array[idx], cxt);
+                oneapi::math::free_shared(b_array[idx], cxt);
+                oneapi::math::free_shared(b_ref_array[idx], cxt);
                 idx++;
             }
         }
@@ -296,21 +297,21 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
             idx++;
         }
     }
-    oneapi::mkl::aligned_free(m_ref);
-    oneapi::mkl::aligned_free(n_ref);
-    oneapi::mkl::aligned_free(lda_ref);
-    oneapi::mkl::aligned_free(ldb_ref);
-    oneapi::mkl::aligned_free(trans_ref);
-    oneapi::mkl::aligned_free(left_right_ref);
-    oneapi::mkl::aligned_free(upper_lower_ref);
-    oneapi::mkl::aligned_free(unit_nonunit_ref);
-    oneapi::mkl::aligned_free(group_size_ref);
+    oneapi::math::aligned_free(m_ref);
+    oneapi::math::aligned_free(n_ref);
+    oneapi::math::aligned_free(lda_ref);
+    oneapi::math::aligned_free(ldb_ref);
+    oneapi::math::aligned_free(trans_ref);
+    oneapi::math::aligned_free(left_right_ref);
+    oneapi::math::aligned_free(upper_lower_ref);
+    oneapi::math::aligned_free(unit_nonunit_ref);
+    oneapi::math::aligned_free(group_size_ref);
     idx = 0;
     for (i = 0; i < group_count; i++) {
         for (j = 0; j < group_size[i]; j++) {
-            oneapi::mkl::free_shared(a_array[idx], cxt);
-            oneapi::mkl::free_shared(b_array[idx], cxt);
-            oneapi::mkl::free_shared(b_ref_array[idx], cxt);
+            oneapi::math::free_shared(a_array[idx], cxt);
+            oneapi::math::free_shared(b_array[idx], cxt);
+            oneapi::math::free_shared(b_ref_array[idx], cxt);
             idx++;
         }
     }
@@ -319,7 +320,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) {
 }
 
 class TrsmBatchUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TrsmBatchUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 5));
@@ -345,8 +346,8 @@ TEST_P(TrsmBatchUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(TrsmBatchUsmTestSuite, TrsmBatchUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/CMakeLists.txt b/tests/unit_tests/blas/extensions/CMakeLists.txt
index 47af93e96..e9e3b6ac5 100644
--- a/tests/unit_tests/blas/extensions/CMakeLists.txt
+++ b/tests/unit_tests/blas/extensions/CMakeLists.txt
@@ -42,7 +42,7 @@ if(BUILD_SHARED_LIBS)
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET blas_extensions_rt SOURCES ${EXTENSIONS_SOURCES})
   else()
-    target_link_libraries(blas_extensions_rt PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(blas_extensions_rt PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 endif()
 
@@ -59,5 +59,5 @@ target_include_directories(blas_extensions_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET blas_extensions_ct  SOURCES ${EXTENSIONS_SOURCES})
 else()
-  target_link_libraries(blas_extensions_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(blas_extensions_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
diff --git a/tests/unit_tests/blas/extensions/gemm_bias.cpp b/tests/unit_tests/blas/extensions/gemm_bias.cpp
index c6e99e829..012208587 100644
--- a/tests/unit_tests/blas/extensions/gemm_bias.cpp
+++ b/tests/unit_tests/blas/extensions/gemm_bias.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,8 +48,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename Ts, typename Ta, typename Tb, typename Tc>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
-         oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, int m, int n, int k, int lda,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa,
+         oneapi::math::transpose transb, oneapi::math::offset offsetc, int m, int n, int k, int lda,
          int ldb, int ldc, Ts alpha, Ts beta) {
     // Prepare data.
     vector<Ta, allocator_helper<Ta, 64>> A;
@@ -61,13 +61,16 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 
     rand_matrix(A, layout, transa, m, k, lda);
     rand_matrix(B, layout, transb, k, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
-    if (offsetc == oneapi::mkl::offset::fix)
-        rand_matrix(co, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, 1, 1, 1);
-    if (offsetc == oneapi::mkl::offset::column)
-        rand_matrix(co, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, m, 1, m);
-    if (offsetc == oneapi::mkl::offset::row)
-        rand_matrix(co, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, n, 1, n);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
+    if (offsetc == oneapi::math::offset::fix)
+        rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, 1, 1,
+                    1);
+    if (offsetc == oneapi::math::offset::column)
+        rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, m, 1,
+                    m);
+    if (offsetc == oneapi::math::offset::row)
+        rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, n, 1,
+                    n);
 
     C_ref = C;
 
@@ -112,28 +115,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gemm_bias(main_queue, transa, transb, offsetc, m,
-                                                           n, k, alpha, A_buffer, lda, ao, B_buffer,
-                                                           ldb, bo, beta, C_buffer, ldc, CO_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gemm_bias(
+                    main_queue, transa, transb, offsetc, m, n, k, alpha, A_buffer, lda, ao,
+                    B_buffer, ldb, bo, beta, C_buffer, ldc, CO_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gemm_bias(main_queue, transa, transb, offsetc, m, n,
-                                                        k, alpha, A_buffer, lda, ao, B_buffer, ldb,
-                                                        bo, beta, C_buffer, ldc, CO_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gemm_bias(main_queue, transa, transb, offsetc, m, n,
+                                                         k, alpha, A_buffer, lda, ao, B_buffer, ldb,
+                                                         bo, beta, C_buffer, ldc, CO_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm_bias,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm_bias,
                                         transa, transb, offsetc, m, n, k, alpha, A_buffer, lda, ao,
                                         B_buffer, ldb, bo, beta, C_buffer, ldc, CO_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm_bias, transa,
-                                        transb, offsetc, m, n, k, alpha, A_buffer, lda, ao,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_bias,
+                                        transa, transb, offsetc, m, n, k, alpha, A_buffer, lda, ao,
                                         B_buffer, ldb, bo, beta, C_buffer, ldc, CO_buffer);
                 break;
             default: break;
@@ -146,7 +149,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -162,58 +165,58 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 }
 
 class GemmBiasTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmBiasTests, Int8Int8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
@@ -221,52 +224,52 @@ TEST_P(GemmBiasTests, Int8Uint8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
@@ -274,52 +277,52 @@ TEST_P(GemmBiasTests, Uint8Int8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
@@ -327,59 +330,59 @@ TEST_P(GemmBiasTests, Uint8Uint8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemmBiasTestSuite, GemmBiasTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp b/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp
index 908eed909..4c10aef2d 100644
--- a/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp
+++ b/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,8 +48,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename Ts, typename Ta, typename Tb, typename Tc>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
-         oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, int m, int n, int k, int lda,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa,
+         oneapi::math::transpose transb, oneapi::math::offset offsetc, int m, int n, int k, int lda,
          int ldb, int ldc, Ts alpha, Ts beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -83,13 +83,16 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 
     rand_matrix(A, layout, transa, m, k, lda);
     rand_matrix(B, layout, transb, k, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
-    if (offsetc == oneapi::mkl::offset::fix)
-        rand_matrix(co, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, 1, 1, 1);
-    if (offsetc == oneapi::mkl::offset::column)
-        rand_matrix(co, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, m, 1, m);
-    if (offsetc == oneapi::mkl::offset::row)
-        rand_matrix(co, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, n, 1, n);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
+    if (offsetc == oneapi::math::offset::fix)
+        rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, 1, 1,
+                    1);
+    if (offsetc == oneapi::math::offset::column)
+        rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, m, 1,
+                    m);
+    if (offsetc == oneapi::math::offset::row)
+        rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, n, 1,
+                    n);
 
     C_ref.resize(C.size());
     for (int i = 0; i < C.size(); i++)
@@ -115,13 +118,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemm_bias(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemm_bias(
                     main_queue, transa, transb, offsetc, m, n, k, alpha, A.data(), lda, ao,
                     B.data(), ldb, bo, beta, C.data(), ldc, co.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemm_bias(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemm_bias(
                     main_queue, transa, transb, offsetc, m, n, k, alpha, A.data(), lda, ao,
                     B.data(), ldb, bo, beta, C.data(), ldc, co.data(), dependencies);
                 break;
@@ -130,15 +133,15 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm_bias,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm_bias,
                                         transa, transb, offsetc, m, n, k, alpha, A.data(), lda, ao,
                                         B.data(), ldb, bo, beta, C.data(), ldc, co.data(),
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm_bias, transa,
-                                        transb, offsetc, m, n, k, alpha, A.data(), lda, ao,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_bias,
+                                        transa, transb, offsetc, m, n, k, alpha, A.data(), lda, ao,
                                         B.data(), ldb, bo, beta, C.data(), ldc, co.data(),
                                         dependencies);
                 break;
@@ -153,7 +156,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -168,58 +171,58 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 }
 
 class GemmBiasUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmBiasUsmTests, Int8Int8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
@@ -227,52 +230,52 @@ TEST_P(GemmBiasUsmTests, Int8Uint8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, int8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
@@ -280,52 +283,52 @@ TEST_P(GemmBiasUsmTests, Uint8Int8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, int8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
@@ -333,59 +336,59 @@ TEST_P(GemmBiasUsmTests, Uint8Uint8Int32Precision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::fix, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::column, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::column, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106,
         alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, uint8_t, uint8_t, int32_t>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, oneapi::mkl::offset::row, 79, 83, 91, 103, 105, 106, alpha,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, oneapi::math::offset::row, 79, 83, 91, 103, 105, 106, alpha,
         beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemmBiasUsmTestSuite, GemmBiasUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/gemmt.cpp b/tests/unit_tests/blas/extensions/gemmt.cpp
index 228a85d33..f1fe54791 100644
--- a/tests/unit_tests/blas/extensions/gemmt.cpp
+++ b/tests/unit_tests/blas/extensions/gemmt.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,14 +48,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int n, int k, int lda,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::transpose transb, int n, int k, int lda,
          int ldb, int ldc, fp alpha, fp beta) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, B, C, C_ref;
     rand_matrix(A, layout, transa, n, k, lda);
     rand_matrix(B, layout, transb, k, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
     C_ref = C;
 
     // Call Reference GEMMT.
@@ -94,27 +94,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gemmt(main_queue, upper_lower, transa, transb, n,
-                                                       k, alpha, A_buffer, lda, B_buffer, ldb, beta,
-                                                       C_buffer, ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gemmt(main_queue, upper_lower, transa, transb, n,
+                                                        k, alpha, A_buffer, lda, B_buffer, ldb,
+                                                        beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gemmt(main_queue, upper_lower, transa, transb, n, k,
-                                                    alpha, A_buffer, lda, B_buffer, ldb, beta,
-                                                    C_buffer, ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gemmt(main_queue, upper_lower, transa, transb, n, k,
+                                                     alpha, A_buffer, lda, B_buffer, ldb, beta,
+                                                     C_buffer, ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemmt,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemmt,
                                         upper_lower, transa, transb, n, k, alpha, A_buffer, lda,
                                         B_buffer, ldb, beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemmt,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemmt,
                                         upper_lower, transa, transb, n, k, alpha, A_buffer, lda,
                                         B_buffer, ldb, beta, C_buffer, ldc);
                 break;
@@ -127,7 +127,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -143,43 +143,43 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class GemmtTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
-};
+class GemmtTests
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmtTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
 }
 
@@ -189,36 +189,36 @@ TEST_P(GemmtTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
 }
 
@@ -226,77 +226,77 @@ TEST_P(GemmtTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0);
     std::complex<float> beta(3.0);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
 }
 
 TEST_P(GemmtTests, ComplexDoublePrecision) {
@@ -305,83 +305,83 @@ TEST_P(GemmtTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0);
     std::complex<double> beta(3.0);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemmtTestSuite, GemmtTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/gemmt_usm.cpp b/tests/unit_tests/blas/extensions/gemmt_usm.cpp
index dac300ae2..3674fe634 100644
--- a/tests/unit_tests/blas/extensions/gemmt_usm.cpp
+++ b/tests/unit_tests/blas/extensions/gemmt_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int n, int k, int lda,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::transpose transb, int n, int k, int lda,
          int ldb, int ldc, fp alpha, fp beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -74,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> A(ua), B(ua), C(ua);
     rand_matrix(A, layout, transa, n, k, lda);
     rand_matrix(B, layout, transb, k, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
 
     auto C_ref = C;
 
@@ -94,28 +94,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemmt(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemmt(
                     main_queue, upper_lower, transa, transb, n, k, alpha, A.data(), lda, B.data(),
                     ldb, beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemmt(main_queue, upper_lower, transa, transb,
-                                                           n, k, alpha, A.data(), lda, B.data(),
-                                                           ldb, beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemmt(main_queue, upper_lower, transa, transb,
+                                                            n, k, alpha, A.data(), lda, B.data(),
+                                                            ldb, beta, C.data(), ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemmt,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemmt,
                                         upper_lower, transa, transb, n, k, alpha, A.data(), lda,
                                         B.data(), ldb, beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemmt,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemmt,
                                         upper_lower, transa, transb, n, k, alpha, A.data(), lda,
                                         B.data(), ldb, beta, C.data(), ldc, dependencies);
                 break;
@@ -129,7 +129,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -144,42 +144,42 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class GemmtUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmtUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                   beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                   beta));
 }
 
@@ -189,36 +189,36 @@ TEST_P(GemmtUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, alpha,
                                    beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha,
                                    beta));
 }
 
@@ -226,77 +226,77 @@ TEST_P(GemmtUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0);
     std::complex<float> beta(3.0);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
 }
 
 TEST_P(GemmtUsmTests, ComplexDoublePrecision) {
@@ -305,83 +305,83 @@ TEST_P(GemmtUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0);
     std::complex<double> beta(3.0);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103, alpha,
-        beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, 27, 98, 101, 102, 103,
-        alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::trans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103,
         alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::conjtrans, 27, 98, 101, 102, 103,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102,
+        103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103,
         alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102,
+        103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemmtUsmTestSuite, GemmtUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/imatcopy.cpp b/tests/unit_tests/blas/extensions/imatcopy.cpp
index ba9400817..f6480ee16 100644
--- a/tests/unit_tests/blas/extensions/imatcopy.cpp
+++ b/tests/unit_tests/blas/extensions/imatcopy.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i, tmp;
 
@@ -65,13 +65,13 @@ int test(device* dev, oneapi::mkl::layout layout) {
 
     int64_t size_a, size_b, size;
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             size_a = lda * n;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             size_a = lda * m;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
@@ -79,10 +79,10 @@ int test(device* dev, oneapi::mkl::layout layout) {
 
     vector<fp, allocator_helper<fp, 64>> AB(size), AB_ref(size);
 
-    rand_matrix(AB, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size, 1,
+    rand_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1,
                 size);
-    copy_matrix(AB, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size, 1, size,
-                AB_ref);
+    copy_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1,
+                size, AB_ref);
 
     // Call reference IMATCOPY.
     int m_ref = (int)m;
@@ -114,24 +114,24 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::imatcopy(main_queue, trans, m, n, alpha, AB_buffer,
-                                                          lda, ldb);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::imatcopy(main_queue, trans, m, n, alpha,
+                                                           AB_buffer, lda, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::imatcopy(main_queue, trans, m, n, alpha, AB_buffer,
-                                                       lda, ldb);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::imatcopy(main_queue, trans, m, n, alpha, AB_buffer,
+                                                        lda, ldb);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::imatcopy,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::imatcopy,
                                         trans, m, n, alpha, AB_buffer, lda, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::imatcopy, trans,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy, trans,
                                         m, n, alpha, AB_buffer, lda, ldb);
                 break;
             default: break;
@@ -144,7 +144,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -155,14 +155,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     auto AB_accessor = AB_buffer.get_host_access(read_only);
-    bool good = check_equal_matrix(AB_accessor, AB_ref, oneapi::mkl::layout::col_major, size, 1,
+    bool good = check_equal_matrix(AB_accessor, AB_ref, oneapi::math::layout::col_major, size, 1,
                                    size, 10, std::cout);
 
     return (int)good;
 }
 
 class ImatcopyTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(ImatcopyTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -186,8 +186,8 @@ TEST_P(ImatcopyTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(ImatcopyTestSuite, ImatcopyTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/imatcopy_usm.cpp b/tests/unit_tests/blas/extensions/imatcopy_usm.cpp
index 1acf4ecaf..dd98e97fa 100644
--- a/tests/unit_tests/blas/extensions/imatcopy_usm.cpp
+++ b/tests/unit_tests/blas/extensions/imatcopy_usm.cpp
@@ -32,9 +32,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -49,7 +49,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,7 +72,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i;
 
@@ -85,13 +85,13 @@ int test(device* dev, oneapi::mkl::layout layout) {
 
     int64_t size_a, size_b, size;
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             size_a = lda * n;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             size_a = lda * m;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
@@ -103,10 +103,10 @@ int test(device* dev, oneapi::mkl::layout layout) {
     AB.resize(size);
     AB_ref.resize(size);
 
-    rand_matrix(AB, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size, 1,
+    rand_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1,
                 size);
-    copy_matrix(AB, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size, 1, size,
-                AB_ref);
+    copy_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1,
+                size, AB_ref);
 
     // Call reference IMATCOPY.
     int m_ref = (int)m;
@@ -119,25 +119,25 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::imatcopy(main_queue, trans, m, n, alpha,
-                                                                 &AB[0], lda, ldb, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::imatcopy(main_queue, trans, m, n, alpha,
+                                                                  &AB[0], lda, ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::imatcopy(main_queue, trans, m, n, alpha,
-                                                              &AB[0], lda, ldb, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::imatcopy(main_queue, trans, m, n, alpha,
+                                                               &AB[0], lda, ldb, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::imatcopy,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::imatcopy,
                                         trans, m, n, alpha, &AB[0], lda, ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::imatcopy, trans,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy, trans,
                                         m, n, alpha, &AB[0], lda, ldb, dependencies);
                 break;
             default: break;
@@ -151,7 +151,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -160,14 +160,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(AB, AB_ref, oneapi::mkl::layout::col_major, size, 1, size, 10,
+    bool good = check_equal_matrix(AB, AB_ref, oneapi::math::layout::col_major, size, 1, size, 10,
                                    std::cout);
 
     return (int)good;
 }
 
 class ImatcopyUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(ImatcopyUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -191,8 +191,8 @@ TEST_P(ImatcopyUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(ImatcopyUsmTestSuite, ImatcopyUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/omatadd.cpp b/tests/unit_tests/blas/extensions/omatadd.cpp
index 7e76f74f9..cb404cc34 100644
--- a/tests/unit_tests/blas/extensions/omatadd.cpp
+++ b/tests/unit_tests/blas/extensions/omatadd.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb, ldc;
-    oneapi::mkl::transpose transa, transb;
+    oneapi::math::transpose transa, transb;
     fp alpha, beta;
     int64_t i, tmp;
 
@@ -69,14 +69,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     int64_t size_a, size_b, size_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            size_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * n : lda * m;
-            size_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+        case oneapi::math::layout::col_major:
+            size_a = (transa == oneapi::math::transpose::nontrans) ? lda * n : lda * m;
+            size_b = (transb == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             size_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            size_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * m : lda * n;
-            size_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+        case oneapi::math::layout::row_major:
+            size_a = (transa == oneapi::math::transpose::nontrans) ? lda * m : lda * n;
+            size_b = (transb == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             size_c = ldc * m;
             break;
         default: break;
@@ -84,14 +84,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
 
     vector<fp, allocator_helper<fp, 64>> A(size_a), B(size_b), C(size_c), C_ref(size_c);
 
-    rand_matrix(A.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_a,
-                1, size_a);
-    rand_matrix(B.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_b,
-                1, size_b);
-    rand_matrix(C.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_c,
-                1, size_c);
-    copy_matrix(C.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_c,
-                1, size_c, C_ref.data());
+    rand_matrix(A.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
+                size_a, 1, size_a);
+    rand_matrix(B.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
+                size_b, 1, size_b);
+    rand_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
+                size_c, 1, size_c);
+    copy_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
+                size_c, 1, size_c, C_ref.data());
 
     // Call reference OMATADD.
     int m_ref = (int)m;
@@ -127,27 +127,27 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::omatadd(main_queue, transa, transb, m, n, alpha,
-                                                         A_buffer, lda, beta, B_buffer, ldb,
-                                                         C_buffer, ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::omatadd(main_queue, transa, transb, m, n, alpha,
+                                                          A_buffer, lda, beta, B_buffer, ldb,
+                                                          C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::omatadd(main_queue, transa, transb, m, n, alpha,
-                                                      A_buffer, lda, beta, B_buffer, ldb, C_buffer,
-                                                      ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::omatadd(main_queue, transa, transb, m, n, alpha,
+                                                       A_buffer, lda, beta, B_buffer, ldb, C_buffer,
+                                                       ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatadd,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatadd,
                                         transa, transb, m, n, alpha, A_buffer, lda, beta, B_buffer,
                                         ldb, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatadd, transa,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatadd, transa,
                                         transb, m, n, alpha, A_buffer, lda, beta, B_buffer, ldb,
                                         C_buffer, ldc);
                 break;
@@ -160,7 +160,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -171,14 +171,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     auto C_accessor = C_buffer.get_host_access(read_only);
-    bool good = check_equal_matrix(C_accessor, C_ref, oneapi::mkl::layout::col_major, size_c, 1,
+    bool good = check_equal_matrix(C_accessor, C_ref, oneapi::math::layout::col_major, size_c, 1,
                                    size_c, 10, std::cout);
 
     return (int)good;
 }
 
 class OmataddTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmataddTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -202,8 +202,8 @@ TEST_P(OmataddTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmataddTestSuite, OmataddTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/omatadd_usm.cpp b/tests/unit_tests/blas/extensions/omatadd_usm.cpp
index eff40ae8d..472c693c8 100644
--- a/tests/unit_tests/blas/extensions/omatadd_usm.cpp
+++ b/tests/unit_tests/blas/extensions/omatadd_usm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,7 +48,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -71,7 +71,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb, ldc;
-    oneapi::mkl::transpose transa, transb;
+    oneapi::math::transpose transa, transb;
     fp alpha, beta;
     int64_t i, tmp;
 
@@ -88,14 +88,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     int64_t size_a, size_b, size_c;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
-            size_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * n : lda * m;
-            size_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+        case oneapi::math::layout::col_major:
+            size_a = (transa == oneapi::math::transpose::nontrans) ? lda * n : lda * m;
+            size_b = (transb == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             size_c = ldc * n;
             break;
-        case oneapi::mkl::layout::row_major:
-            size_a = (transa == oneapi::mkl::transpose::nontrans) ? lda * m : lda * n;
-            size_b = (transb == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+        case oneapi::math::layout::row_major:
+            size_a = (transa == oneapi::math::transpose::nontrans) ? lda * m : lda * n;
+            size_b = (transb == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             size_c = ldc * m;
             break;
         default: break;
@@ -109,13 +109,13 @@ int test(device* dev, oneapi::mkl::layout layout) {
     C.resize(size_c);
     C_ref.resize(size_c);
 
-    rand_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_a, 1,
+    rand_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_a, 1,
                 size_a);
-    rand_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_b, 1,
+    rand_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_b, 1,
                 size_b);
-    rand_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_c, 1,
+    rand_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_c, 1,
                 size_c);
-    copy_matrix(C, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_c, 1,
+    copy_matrix(C, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_c, 1,
                 size_c, C_ref);
 
     // Call reference OMATADD.
@@ -131,28 +131,28 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::omatadd(main_queue, transa, transb, m, n,
-                                                                alpha, &A[0], lda, beta, &B[0], ldb,
-                                                                &C[0], ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::omatadd(main_queue, transa, transb, m, n,
+                                                                 alpha, &A[0], lda, beta, &B[0],
+                                                                 ldb, &C[0], ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::omatadd(main_queue, transa, transb, m, n,
-                                                             alpha, &A[0], lda, beta, &B[0], ldb,
-                                                             &C[0], ldc, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::omatadd(main_queue, transa, transb, m, n,
+                                                              alpha, &A[0], lda, beta, &B[0], ldb,
+                                                              &C[0], ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatadd,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatadd,
                                         transa, transb, m, n, alpha, &A[0], lda, beta, &B[0], ldb,
                                         &C[0], ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatadd, transa,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatadd, transa,
                                         transb, m, n, alpha, &A[0], lda, beta, &B[0], ldb, &C[0],
                                         ldc, dependencies);
                 break;
@@ -166,7 +166,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -175,14 +175,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(C, C_ref, oneapi::mkl::layout::col_major, size_c, 1, size_c, 10,
+    bool good = check_equal_matrix(C, C_ref, oneapi::math::layout::col_major, size_c, 1, size_c, 10,
                                    std::cout);
 
     return (int)good;
 }
 
 class OmataddUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmataddUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -206,8 +206,8 @@ TEST_P(OmataddUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmataddUsmTestSuite, OmataddUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/omatcopy.cpp b/tests/unit_tests/blas/extensions/omatcopy.cpp
index 1ba35d057..27aeb0739 100644
--- a/tests/unit_tests/blas/extensions/omatcopy.cpp
+++ b/tests/unit_tests/blas/extensions/omatcopy.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,11 +48,11 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i, tmp;
 
@@ -63,33 +63,33 @@ int test(device* dev, oneapi::mkl::layout layout) {
     alpha = rand_scalar<fp>();
 
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        trans = (oneapi::mkl::transpose)(std::rand() % 2);
+        trans = (oneapi::math::transpose)(std::rand() % 2);
     }
     else {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            trans = oneapi::mkl::transpose::conjtrans;
+            trans = oneapi::math::transpose::conjtrans;
         else
-            trans = (oneapi::mkl::transpose)tmp;
+            trans = (oneapi::math::transpose)tmp;
     }
 
     int64_t size_a, size_b;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             size_a = lda * n;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             size_a = lda * m;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
 
     vector<fp, allocator_helper<fp, 64>> A(size_a), B(size_b), B_ref(size_b);
 
-    rand_matrix(A.data(), layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A.data(), layout, oneapi::math::transpose::nontrans, m, n, lda);
     rand_matrix(B.data(), layout, trans, m, n, ldb);
 
     // Call reference OMATCOPY.
@@ -123,24 +123,24 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::omatcopy(main_queue, trans, m, n, alpha, A_buffer,
-                                                          lda, B_buffer, ldb);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::omatcopy(main_queue, trans, m, n, alpha, A_buffer,
+                                                           lda, B_buffer, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::omatcopy(main_queue, trans, m, n, alpha, A_buffer,
-                                                       lda, B_buffer, ldb);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::omatcopy(main_queue, trans, m, n, alpha, A_buffer,
+                                                        lda, B_buffer, ldb);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatcopy,
                                         trans, m, n, alpha, A_buffer, lda, B_buffer, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy, trans,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy, trans,
                                         m, n, alpha, A_buffer, lda, B_buffer, ldb);
                 break;
             default: break;
@@ -153,7 +153,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -164,14 +164,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     auto B_accessor = B_buffer.get_host_access(read_only);
-    bool good = check_equal_matrix(B_accessor, B_ref, oneapi::mkl::layout::col_major, size_b, 1,
+    bool good = check_equal_matrix(B_accessor, B_ref, oneapi::math::layout::col_major, size_b, 1,
                                    size_b, 10, std::cout);
 
     return (int)good;
 }
 
 class OmatcopyTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmatcopyTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -195,8 +195,8 @@ TEST_P(OmatcopyTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmatcopyTestSuite, OmatcopyTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/omatcopy2.cpp b/tests/unit_tests/blas/extensions/omatcopy2.cpp
index 3bc7dfccb..1829f7e77 100644
--- a/tests/unit_tests/blas/extensions/omatcopy2.cpp
+++ b/tests/unit_tests/blas/extensions/omatcopy2.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,12 +48,12 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
     int64_t stride_a, stride_b;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
 
     stride_a = 1 + std::rand() % 50;
@@ -68,23 +68,23 @@ int test(device* dev, oneapi::mkl::layout layout) {
     int64_t size_a, size_b;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             size_a = lda * n;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             size_a = lda * m;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
 
     vector<fp, allocator_helper<fp, 64>> A(size_a), B(size_b), B_ref(size_b);
 
-    rand_matrix(A.data(), layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A.data(), layout, oneapi::math::transpose::nontrans, m, n, lda);
     rand_matrix(B.data(), layout, trans, m, n, ldb);
-    copy_matrix(B.data(), oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_b,
-                1, size_b, B_ref.data());
+    copy_matrix(B.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans,
+                size_b, 1, size_b, B_ref.data());
 
     // Call reference OMATCOPY2.
     int64_t m_ref = m;
@@ -120,25 +120,26 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::omatcopy2(main_queue, trans, m, n, alpha, A_buffer,
-                                                           lda, stride_a, B_buffer, ldb, stride_b);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::omatcopy2(main_queue, trans, m, n, alpha,
+                                                            A_buffer, lda, stride_a, B_buffer, ldb,
+                                                            stride_b);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::omatcopy2(main_queue, trans, m, n, alpha, A_buffer,
-                                                        lda, stride_a, B_buffer, ldb, stride_b);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::omatcopy2(main_queue, trans, m, n, alpha, A_buffer,
+                                                         lda, stride_a, B_buffer, ldb, stride_b);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatcopy2,
                                         trans, m, n, alpha, A_buffer, lda, stride_a, B_buffer, ldb,
                                         stride_b);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy2, trans,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy2, trans,
                                         m, n, alpha, A_buffer, lda, stride_a, B_buffer, ldb,
                                         stride_b);
                 break;
@@ -152,7 +153,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -163,14 +164,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     auto B_accessor = B_buffer.get_host_access(read_only);
-    bool good = check_equal_matrix(B_accessor, B_ref, oneapi::mkl::layout::col_major, size_b, 1,
+    bool good = check_equal_matrix(B_accessor, B_ref, oneapi::math::layout::col_major, size_b, 1,
                                    size_b, 10, std::cout);
 
     return (int)good;
 }
 
 class Omatcopy2Tests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Omatcopy2Tests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -194,8 +195,8 @@ TEST_P(Omatcopy2Tests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(Omatcopy2TestSuite, Omatcopy2Tests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp b/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp
index 3dcf87dc1..59a8955df 100644
--- a/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp
+++ b/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp
@@ -32,9 +32,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -49,7 +49,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
     int64_t m, n;
     int64_t lda, ldb;
     int64_t stride_a, stride_b;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
 
     stride_a = 1 + std::rand() % 50;
@@ -88,13 +88,13 @@ int test(device* dev, oneapi::mkl::layout layout) {
     int64_t size_a, size_b;
 
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             size_a = lda * n;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             size_a = lda * m;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
@@ -106,11 +106,11 @@ int test(device* dev, oneapi::mkl::layout layout) {
     B.resize(size_b);
     B_ref.resize(size_b);
 
-    rand_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_a, 1,
+    rand_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_a, 1,
                 size_a);
-    rand_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_b, 1,
+    rand_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_b, 1,
                 size_b);
-    copy_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_b, 1,
+    copy_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_b, 1,
                 size_b, B_ref);
 
     // Call reference OMATCOPY2.
@@ -127,28 +127,28 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::omatcopy2(main_queue, trans, m, n, alpha,
-                                                                  &A[0], lda, stride_a, &B[0], ldb,
-                                                                  stride_b, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::omatcopy2(main_queue, trans, m, n, alpha,
+                                                                   &A[0], lda, stride_a, &B[0], ldb,
+                                                                   stride_b, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::omatcopy2(main_queue, trans, m, n, alpha,
-                                                               &A[0], lda, stride_a, &B[0], ldb,
-                                                               stride_b, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::omatcopy2(main_queue, trans, m, n, alpha,
+                                                                &A[0], lda, stride_a, &B[0], ldb,
+                                                                stride_b, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatcopy2,
                                         trans, m, n, alpha, &A[0], lda, stride_a, &B[0], ldb,
                                         stride_b, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy2, trans,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy2, trans,
                                         m, n, alpha, &A[0], lda, stride_a, &B[0], ldb, stride_b,
                                         dependencies);
                 break;
@@ -163,7 +163,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -172,14 +172,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(B, B_ref, oneapi::mkl::layout::col_major, size_b, 1, size_b, 10,
+    bool good = check_equal_matrix(B, B_ref, oneapi::math::layout::col_major, size_b, 1, size_b, 10,
                                    std::cout);
 
     return (int)good;
 }
 
 class Omatcopy2UsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Omatcopy2UsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -203,8 +203,8 @@ TEST_P(Omatcopy2UsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(Omatcopy2UsmTestSuite, Omatcopy2UsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/extensions/omatcopy_usm.cpp b/tests/unit_tests/blas/extensions/omatcopy_usm.cpp
index b217e2f54..d3b6be7fe 100644
--- a/tests/unit_tests/blas/extensions/omatcopy_usm.cpp
+++ b/tests/unit_tests/blas/extensions/omatcopy_usm.cpp
@@ -32,9 +32,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -49,7 +49,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,7 +72,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
     // Prepare data.
     int64_t m, n;
     int64_t lda, ldb;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     fp alpha;
     int64_t i, tmp;
 
@@ -85,13 +85,13 @@ int test(device* dev, oneapi::mkl::layout layout) {
 
     int64_t size_a, size_b;
     switch (layout) {
-        case oneapi::mkl::layout::col_major:
+        case oneapi::math::layout::col_major:
             size_a = lda * n;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * n : ldb * m;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * n : ldb * m;
             break;
-        case oneapi::mkl::layout::row_major:
+        case oneapi::math::layout::row_major:
             size_a = lda * m;
-            size_b = (trans == oneapi::mkl::transpose::nontrans) ? ldb * m : ldb * n;
+            size_b = (trans == oneapi::math::transpose::nontrans) ? ldb * m : ldb * n;
             break;
         default: break;
     }
@@ -103,11 +103,11 @@ int test(device* dev, oneapi::mkl::layout layout) {
     B.resize(size_b);
     B_ref.resize(size_b);
 
-    rand_matrix(A, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_a, 1,
+    rand_matrix(A, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_a, 1,
                 size_a);
-    rand_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_b, 1,
+    rand_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_b, 1,
                 size_b);
-    copy_matrix(B, oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_b, 1,
+    copy_matrix(B, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_b, 1,
                 size_b, B_ref);
 
     // Call reference OMATCOPY.
@@ -121,25 +121,25 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::omatcopy(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::omatcopy(
                     main_queue, trans, m, n, alpha, &A[0], lda, &B[0], ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::omatcopy(main_queue, trans, m, n, alpha, &A[0],
-                                                              lda, &B[0], ldb, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::omatcopy(
+                    main_queue, trans, m, n, alpha, &A[0], lda, &B[0], ldb, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatcopy,
                                         trans, m, n, alpha, &A[0], lda, &B[0], ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy, trans,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy, trans,
                                         m, n, alpha, &A[0], lda, &B[0], ldb, dependencies);
                 break;
             default: break;
@@ -153,7 +153,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -162,14 +162,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
     }
 
     // Compare the results of reference implementation and DPC++ implementation.
-    bool good = check_equal_matrix(B, B_ref, oneapi::mkl::layout::col_major, size_b, 1, size_b, 10,
+    bool good = check_equal_matrix(B, B_ref, oneapi::math::layout::col_major, size_b, 1, size_b, 10,
                                    std::cout);
 
     return (int)good;
 }
 
 class OmatcopyUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(OmatcopyUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -193,8 +193,8 @@ TEST_P(OmatcopyUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(OmatcopyUsmTestSuite, OmatcopyUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/include/allocator_helper.hpp b/tests/unit_tests/blas/include/allocator_helper.hpp
index 79fd22254..6254ad26c 100644
--- a/tests/unit_tests/blas/include/allocator_helper.hpp
+++ b/tests/unit_tests/blas/include/allocator_helper.hpp
@@ -48,7 +48,7 @@ struct allocator_helper {
     allocator_helper(allocator_helper<U, align2>&& other) noexcept {}
 
     T* allocate(size_t n) {
-        void* mem = oneapi::mkl::aligned_alloc(align, n * sizeof(T));
+        void* mem = oneapi::math::aligned_alloc(align, n * sizeof(T));
         if (!mem)
             throw std::bad_alloc();
 
@@ -56,7 +56,7 @@ struct allocator_helper {
     }
 
     void deallocate(T* p, size_t n) noexcept {
-        oneapi::mkl::aligned_free(p);
+        oneapi::math::aligned_free(p);
     }
 
     constexpr size_t max_size() const noexcept {
diff --git a/tests/unit_tests/blas/include/onemkl_blas_helper.hpp b/tests/unit_tests/blas/include/onemath_blas_helper.hpp
similarity index 59%
rename from tests/unit_tests/blas/include/onemkl_blas_helper.hpp
rename to tests/unit_tests/blas/include/onemath_blas_helper.hpp
index 5489aaa61..f258e55b9 100644
--- a/tests/unit_tests/blas/include/onemkl_blas_helper.hpp
+++ b/tests/unit_tests/blas/include/onemath_blas_helper.hpp
@@ -17,53 +17,53 @@
 * SPDX-License-Identifier: Apache-2.0
 *******************************************************************************/
 
-#ifndef ONEMKL_BLAS_HELPER_HPP
-#define ONEMKL_BLAS_HELPER_HPP
+#ifndef ONEMATH_BLAS_HELPER_HPP
+#define ONEMATH_BLAS_HELPER_HPP
 
 #include "cblas.h"
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 typedef enum { CblasRowOffset = 101, CblasColOffset = 102, CblasFixOffset = 103 } CBLAS_OFFSET;
 
 /**
- * Helper methods for converting between onemkl types and their BLAS
+ * Helper methods for converting between onemath types and their CBLAS
  * equivalents.
  */
 
-inline CBLAS_TRANSPOSE convert_to_cblas_trans(oneapi::mkl::transpose trans) {
-    if (trans == oneapi::mkl::transpose::trans)
+inline CBLAS_TRANSPOSE convert_to_cblas_trans(oneapi::math::transpose trans) {
+    if (trans == oneapi::math::transpose::trans)
         return CBLAS_TRANSPOSE::CblasTrans;
-    else if (trans == oneapi::mkl::transpose::conjtrans)
+    else if (trans == oneapi::math::transpose::conjtrans)
         return CBLAS_TRANSPOSE::CblasConjTrans;
     else
         return CBLAS_TRANSPOSE::CblasNoTrans;
 }
 
-inline CBLAS_UPLO convert_to_cblas_uplo(oneapi::mkl::uplo is_upper) {
-    return is_upper == oneapi::mkl::uplo::upper ? CBLAS_UPLO::CblasUpper : CBLAS_UPLO::CblasLower;
+inline CBLAS_UPLO convert_to_cblas_uplo(oneapi::math::uplo is_upper) {
+    return is_upper == oneapi::math::uplo::upper ? CBLAS_UPLO::CblasUpper : CBLAS_UPLO::CblasLower;
 }
 
-inline CBLAS_DIAG convert_to_cblas_diag(oneapi::mkl::diag is_unit) {
-    return is_unit == oneapi::mkl::diag::unit ? CBLAS_DIAG::CblasUnit : CBLAS_DIAG::CblasNonUnit;
+inline CBLAS_DIAG convert_to_cblas_diag(oneapi::math::diag is_unit) {
+    return is_unit == oneapi::math::diag::unit ? CBLAS_DIAG::CblasUnit : CBLAS_DIAG::CblasNonUnit;
 }
 
-inline CBLAS_SIDE convert_to_cblas_side(oneapi::mkl::side is_left) {
-    return is_left == oneapi::mkl::side::left ? CBLAS_SIDE::CblasLeft : CBLAS_SIDE::CblasRight;
+inline CBLAS_SIDE convert_to_cblas_side(oneapi::math::side is_left) {
+    return is_left == oneapi::math::side::left ? CBLAS_SIDE::CblasLeft : CBLAS_SIDE::CblasRight;
 }
 
-inline CBLAS_OFFSET convert_to_cblas_offset(oneapi::mkl::offset offsetc) {
-    if (offsetc == oneapi::mkl::offset::fix)
+inline CBLAS_OFFSET convert_to_cblas_offset(oneapi::math::offset offsetc) {
+    if (offsetc == oneapi::math::offset::fix)
         return CBLAS_OFFSET::CblasFixOffset;
-    else if (offsetc == oneapi::mkl::offset::column)
+    else if (offsetc == oneapi::math::offset::column)
         return CBLAS_OFFSET::CblasColOffset;
     else
         return CBLAS_OFFSET::CblasRowOffset;
 }
 
-inline CBLAS_LAYOUT convert_to_cblas_layout(oneapi::mkl::layout is_column) {
-    return is_column == oneapi::mkl::layout::col_major ? CBLAS_LAYOUT::CblasColMajor
-                                                       : CBLAS_LAYOUT::CblasRowMajor;
+inline CBLAS_LAYOUT convert_to_cblas_layout(oneapi::math::layout is_column) {
+    return is_column == oneapi::math::layout::col_major ? CBLAS_LAYOUT::CblasColMajor
+                                                        : CBLAS_LAYOUT::CblasRowMajor;
 }
 
 static const CBLAS_TRANSPOSE fcblastrans[] = { CblasNoTrans, CblasTrans, CblasConjTrans };
@@ -82,4 +82,4 @@ static const CBLAS_TRANSPOSE fcblastrans_c[] = { CblasConjTrans, CblasNoTrans, C
 
 static const CBLAS_OFFSET fcblasoffset[] = { CblasColOffset, CblasRowOffset, CblasFixOffset };
 
-#endif // ONEMKL_BLAS_HELPER_HPP
+#endif // ONEMATH_BLAS_HELPER_HPP
diff --git a/tests/unit_tests/blas/include/reference_blas_templates.hpp b/tests/unit_tests/blas/include/reference_blas_templates.hpp
index de7e36d40..45263cc63 100644
--- a/tests/unit_tests/blas/include/reference_blas_templates.hpp
+++ b/tests/unit_tests/blas/include/reference_blas_templates.hpp
@@ -24,7 +24,7 @@
 #include <complex>
 #include <cstdint>
 #include "cblas.h"
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 #include "test_helper.hpp"
 #include "reference_blas_wrappers.hpp"
 
@@ -162,18 +162,18 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c
         sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n;
         sizec = *ldc * *m;
     }
-    float* af = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea);
-    float* bf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb);
-    float* cf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizec);
+    float* af = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizea);
+    float* bf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb);
+    float* cf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizec);
     copy_mat(a, layout, transa, *m, *k, *lda, af);
     copy_mat(b, layout, transb, *k, *n, *ldb, bf);
     copy_mat(c, layout, CblasNoTrans, *m, *n, *ldc, cf);
     cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, alphaf, af, *lda, bf, *ldb, betaf, cf,
                         *ldc);
     copy_mat(cf, layout, CblasNoTrans, *m, *n, *ldc, c);
-    oneapi::mkl::aligned_free(af);
-    oneapi::mkl::aligned_free(bf);
-    oneapi::mkl::aligned_free(cf);
+    oneapi::math::aligned_free(af);
+    oneapi::math::aligned_free(bf);
+    oneapi::math::aligned_free(cf);
 }
 
 template <>
@@ -230,20 +230,20 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c
         sizea = (transa == CblasNoTrans) ? *lda * *m : *lda * *k;
         sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n;
     }
-    float* af = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea);
-    float* bf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb);
+    float* af = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizea);
+    float* bf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb);
     copy_mat(a, layout, transa, *m, *k, *lda, af);
     copy_mat(b, layout, transb, *k, *n, *ldb, bf);
     cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, af, *lda, bf, *ldb, *beta, c,
                         *ldc);
-    oneapi::mkl::aligned_free(af);
-    oneapi::mkl::aligned_free(bf);
+    oneapi::math::aligned_free(af);
+    oneapi::math::aligned_free(bf);
 }
 
 template <>
 void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m,
-          const int* n, const int* k, const float* alpha, const oneapi::mkl::bfloat16* a,
-          const int* lda, const oneapi::mkl::bfloat16* b, const int* ldb, const float* beta,
+          const int* n, const int* k, const float* alpha, const oneapi::math::bfloat16* a,
+          const int* lda, const oneapi::math::bfloat16* b, const int* ldb, const float* beta,
           float* c, const int* ldc) {
     // Not supported in NETLIB. SGEMM is used as reference.
     int sizea, sizeb;
@@ -255,14 +255,14 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c
         sizea = (transa == CblasNoTrans) ? *lda * *m : *lda * *k;
         sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n;
     }
-    float* af = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea);
-    float* bf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb);
+    float* af = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizea);
+    float* bf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb);
     copy_mat(a, layout, transa, *m, *k, *lda, af);
     copy_mat(b, layout, transb, *k, *n, *ldb, bf);
     cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, af, *lda, bf, *ldb, *beta, c,
                         *ldc);
-    oneapi::mkl::aligned_free(af);
-    oneapi::mkl::aligned_free(bf);
+    oneapi::math::aligned_free(af);
+    oneapi::math::aligned_free(bf);
 }
 
 template <typename fp>
@@ -1587,9 +1587,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
         sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n;
         sizec = *ldc * *m;
     }
-    double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea);
-    double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb);
-    double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec);
+    double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea);
+    double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb);
+    double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec);
     double alphad = *alpha;
     double betad = *beta;
     double aod = *ao;
@@ -1600,9 +1600,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
     cblas_dgemm_wrapper(layout, transa, transb, *m, *n, *k, alphad, ad, *lda, bd, *ldb, betad, cd,
                         *ldc);
     copy_mat(cd, layout, *m, *n, *ldc, offsetc, co, c);
-    oneapi::mkl::aligned_free(ad);
-    oneapi::mkl::aligned_free(bd);
-    oneapi::mkl::aligned_free(cd);
+    oneapi::math::aligned_free(ad);
+    oneapi::math::aligned_free(bd);
+    oneapi::math::aligned_free(cd);
 }
 
 template <>
@@ -1623,9 +1623,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
         sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n;
         sizec = *ldc * *m;
     }
-    double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea);
-    double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb);
-    double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec);
+    double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea);
+    double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb);
+    double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec);
     double alphad = *alpha;
     double betad = *beta;
     double aod = *ao;
@@ -1636,9 +1636,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
     cblas_dgemm_wrapper(layout, transa, transb, *m, *n, *k, alphad, ad, *lda, bd, *ldb, betad, cd,
                         *ldc);
     copy_mat(cd, layout, *m, *n, *ldc, offsetc, co, c);
-    oneapi::mkl::aligned_free(ad);
-    oneapi::mkl::aligned_free(bd);
-    oneapi::mkl::aligned_free(cd);
+    oneapi::math::aligned_free(ad);
+    oneapi::math::aligned_free(bd);
+    oneapi::math::aligned_free(cd);
 }
 
 template <>
@@ -1658,9 +1658,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
         sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n;
         sizec = *ldc * *m;
     }
-    double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea);
-    double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb);
-    double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec);
+    double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea);
+    double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb);
+    double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec);
     double alphad = *alpha;
     double betad = *beta;
     double aod = *ao;
@@ -1671,9 +1671,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
     cblas_dgemm_wrapper(layout, transa, transb, *m, *n, *k, alphad, ad, *lda, bd, *ldb, betad, cd,
                         *ldc);
     copy_mat(cd, layout, *m, *n, *ldc, offsetc, co, c);
-    oneapi::mkl::aligned_free(ad);
-    oneapi::mkl::aligned_free(bd);
-    oneapi::mkl::aligned_free(cd);
+    oneapi::math::aligned_free(ad);
+    oneapi::math::aligned_free(bd);
+    oneapi::math::aligned_free(cd);
 }
 
 template <>
@@ -1694,9 +1694,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
         sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n;
         sizec = *ldc * *m;
     }
-    double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea);
-    double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb);
-    double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec);
+    double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea);
+    double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb);
+    double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec);
     double alphad = *alpha;
     double betad = *beta;
     double aod = *ao;
@@ -1707,9 +1707,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran
     cblas_dgemm_wrapper(layout, transa, transb, *m, *n, *k, alphad, ad, *lda, bd, *ldb, betad, cd,
                         *ldc);
     copy_mat(cd, layout, *m, *n, *ldc, offsetc, co, c);
-    oneapi::mkl::aligned_free(ad);
-    oneapi::mkl::aligned_free(bd);
-    oneapi::mkl::aligned_free(cd);
+    oneapi::math::aligned_free(ad);
+    oneapi::math::aligned_free(bd);
+    oneapi::math::aligned_free(cd);
 }
 
 template <typename fp>
@@ -1726,12 +1726,12 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa,
     // Not supported in NETLIB. SGEMM is used as reference.
     int sizec;
     sizec = *ldc * *n;
-    float* cf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizec);
+    float* cf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizec);
     update_c(c, layout, upper_lower, *n, *n, *ldc, cf);
     cblas_sgemm_wrapper(layout, transa, transb, *n, *n, *k, *alpha, a, *lda, b, *ldb, *beta, cf,
                         *ldc);
     update_c(cf, layout, upper_lower, *n, *n, *ldc, c);
-    oneapi::mkl::aligned_free(cf);
+    oneapi::math::aligned_free(cf);
 }
 
 template <>
@@ -1742,12 +1742,12 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa,
     // Not supported in NETLIB. DGEMM is used as reference.
     int sizec;
     sizec = *ldc * *n;
-    double* cf = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec);
+    double* cf = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec);
     update_c(c, layout, upper_lower, *n, *n, *ldc, cf);
     cblas_dgemm_wrapper(layout, transa, transb, *n, *n, *k, *alpha, a, *lda, b, *ldb, *beta, cf,
                         *ldc);
     update_c(cf, layout, upper_lower, *n, *n, *ldc, c);
-    oneapi::mkl::aligned_free(cf);
+    oneapi::math::aligned_free(cf);
 }
 
 template <>
@@ -1760,12 +1760,12 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa,
     int sizec;
     sizec = *ldc * *n;
     std::complex<float>* cf =
-        (std::complex<float>*)oneapi::mkl::aligned_alloc(64, sizeof(std::complex<float>) * sizec);
+        (std::complex<float>*)oneapi::math::aligned_alloc(64, sizeof(std::complex<float>) * sizec);
     update_c(c, layout, upper_lower, *n, *n, *ldc, cf);
     cblas_cgemm_wrapper(layout, transa, transb, *n, *n, *k, alpha, a, *lda, b, *ldb, beta, cf,
                         *ldc);
     update_c(cf, layout, upper_lower, *n, *n, *ldc, c);
-    oneapi::mkl::aligned_free(cf);
+    oneapi::math::aligned_free(cf);
 }
 
 template <>
@@ -1777,13 +1777,13 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa,
     // Not supported in NETLIB. ZGEMM is used as reference.
     int sizec;
     sizec = *ldc * *n;
-    std::complex<double>* cf =
-        (std::complex<double>*)oneapi::mkl::aligned_alloc(64, sizeof(std::complex<double>) * sizec);
+    std::complex<double>* cf = (std::complex<double>*)oneapi::math::aligned_alloc(
+        64, sizeof(std::complex<double>) * sizec);
     update_c(c, layout, upper_lower, *n, *n, *ldc, cf);
     cblas_zgemm_wrapper(layout, transa, transb, *n, *n, *k, alpha, a, *lda, b, *ldb, beta, cf,
                         *ldc);
     update_c(cf, layout, upper_lower, *n, *n, *ldc, c);
-    oneapi::mkl::aligned_free(cf);
+    oneapi::math::aligned_free(cf);
 }
 
 template <typename fp>
@@ -1974,10 +1974,10 @@ fp sametype_conj(fp x) {
 }
 
 template <typename fp>
-void omatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int64_t m, int64_t n,
+void omatcopy_ref(oneapi::math::layout layout, oneapi::math::transpose trans, int64_t m, int64_t n,
                   fp alpha, fp* A, int64_t lda, fp* B, int64_t ldb) {
     int64_t logical_m, logical_n;
-    if (layout == oneapi::mkl::layout::col_major) {
+    if (layout == oneapi::math::layout::col_major) {
         logical_m = m;
         logical_n = n;
     }
@@ -1985,14 +1985,14 @@ void omatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int6
         logical_m = n;
         logical_n = m;
     }
-    if (trans == oneapi::mkl::transpose::nontrans) {
+    if (trans == oneapi::math::transpose::nontrans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 B[j * ldb + i] = alpha * A[j * lda + i];
             }
         }
     }
-    else if (trans == oneapi::mkl::transpose::trans) {
+    else if (trans == oneapi::math::transpose::trans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 B[i * ldb + j] = alpha * A[j * lda + i];
@@ -2010,12 +2010,12 @@ void omatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int6
 }
 
 template <typename fp>
-void omatcopy2_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, const int64_t& m,
+void omatcopy2_ref(oneapi::math::layout layout, oneapi::math::transpose trans, const int64_t& m,
                    const int64_t& n, const fp& alpha, const fp* in_matrix, const int64_t& ld_in,
                    const int64_t& inc_in, fp* out_matrix, const int64_t& ld_out,
                    const int64_t inc_out) {
     int64_t logical_m, logical_n;
-    if (layout == oneapi::mkl::layout::col_major) {
+    if (layout == oneapi::math::layout::col_major) {
         logical_m = m;
         logical_n = n;
     }
@@ -2023,7 +2023,7 @@ void omatcopy2_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, con
         logical_m = n;
         logical_n = m;
     }
-    if (trans == oneapi::mkl::transpose::trans) {
+    if (trans == oneapi::math::transpose::trans) {
         for (int64_t i = 0; i < logical_m; ++i) {
             for (int64_t j = 0; j < logical_n; ++j) {
                 {
@@ -2033,7 +2033,7 @@ void omatcopy2_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, con
             }
         }
     }
-    else if (trans == oneapi::mkl::transpose::nontrans) {
+    else if (trans == oneapi::math::transpose::nontrans) {
         for (int i = 0; i < logical_n; ++i) {
             for (int j = 0; j < logical_m; ++j) {
                 {
@@ -2056,10 +2056,10 @@ void omatcopy2_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, con
 }
 
 template <typename fp>
-void imatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int64_t m, int64_t n,
+void imatcopy_ref(oneapi::math::layout layout, oneapi::math::transpose trans, int64_t m, int64_t n,
                   fp alpha, fp* A, int64_t lda, int64_t ldb) {
     int64_t logical_m, logical_n;
-    if (layout == oneapi::mkl::layout::col_major) {
+    if (layout == oneapi::math::layout::col_major) {
         logical_m = m;
         logical_n = n;
     }
@@ -2068,16 +2068,16 @@ void imatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int6
         logical_n = m;
     }
     std::vector<fp> temp(m * n);
-    int64_t ld_temp = (trans == oneapi::mkl::transpose::nontrans ? logical_m : logical_n);
+    int64_t ld_temp = (trans == oneapi::math::transpose::nontrans ? logical_m : logical_n);
 
-    if (trans == oneapi::mkl::transpose::nontrans) {
+    if (trans == oneapi::math::transpose::nontrans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 temp[j * ld_temp + i] = alpha * A[j * lda + i];
             }
         }
     }
-    else if (trans == oneapi::mkl::transpose::trans) {
+    else if (trans == oneapi::math::transpose::trans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 temp[i * ld_temp + j] = alpha * A[j * lda + i];
@@ -2093,7 +2093,7 @@ void imatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int6
         }
     }
 
-    if (trans == oneapi::mkl::transpose::nontrans) {
+    if (trans == oneapi::math::transpose::nontrans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 A[j * ldb + i] = temp[j * ld_temp + i];
@@ -2110,11 +2110,11 @@ void imatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int6
 }
 
 template <typename fp>
-void omatadd_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
-                 oneapi::mkl::transpose transb, int64_t m, int64_t n, fp alpha, fp* A, int64_t lda,
+void omatadd_ref(oneapi::math::layout layout, oneapi::math::transpose transa,
+                 oneapi::math::transpose transb, int64_t m, int64_t n, fp alpha, fp* A, int64_t lda,
                  fp beta, fp* B, int64_t ldb, fp* C, int64_t ldc) {
     int64_t logical_m, logical_n;
-    if (layout == oneapi::mkl::layout::col_major) {
+    if (layout == oneapi::math::layout::col_major) {
         logical_m = m;
         logical_n = n;
     }
@@ -2129,14 +2129,14 @@ void omatadd_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         }
     }
 
-    if (transa == oneapi::mkl::transpose::nontrans) {
+    if (transa == oneapi::math::transpose::nontrans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 C[j * ldc + i] += alpha * A[j * lda + i];
             }
         }
     }
-    else if (transa == oneapi::mkl::transpose::trans) {
+    else if (transa == oneapi::math::transpose::trans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 C[j * ldc + i] += alpha * A[i * lda + j];
@@ -2152,14 +2152,14 @@ void omatadd_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         }
     }
 
-    if (transb == oneapi::mkl::transpose::nontrans) {
+    if (transb == oneapi::math::transpose::nontrans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 C[j * ldc + i] += beta * B[j * ldb + i];
             }
         }
     }
-    else if (transb == oneapi::mkl::transpose::trans) {
+    else if (transb == oneapi::math::transpose::trans) {
         for (int64_t j = 0; j < logical_n; j++) {
             for (int64_t i = 0; i < logical_m; i++) {
                 C[j * ldc + i] += beta * B[i * ldb + j];
diff --git a/tests/unit_tests/blas/include/reference_blas_wrappers.hpp b/tests/unit_tests/blas/include/reference_blas_wrappers.hpp
index bac1a76a8..d00d20947 100644
--- a/tests/unit_tests/blas/include/reference_blas_wrappers.hpp
+++ b/tests/unit_tests/blas/include/reference_blas_wrappers.hpp
@@ -20,8 +20,8 @@
 #ifndef _REFERENCE_BLAS_WRAPPERS_HPP__
 #define _REFERENCE_BLAS_WRAPPERS_HPP__
 
-#include "oneapi/mkl/exceptions.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math/detail/exceptions.hpp"
+#include "oneapi/math/detail/config.hpp"
 #include <string>
 #include "cblas.h"
 
@@ -41,11 +41,11 @@ extern "C" {
 static LIB_TYPE h_libblas = NULL;
 static LIB_TYPE blas_library() {
     if (h_libblas == NULL) {
-        h_libblas = GET_LIB_HANDLE(ONEMKL_REF_BLAS_LIBNAME);
+        h_libblas = GET_LIB_HANDLE(ONEMATH_REF_BLAS_LIBNAME);
         if (h_libblas == NULL) {
-            throw oneapi::mkl::library_not_found(
+            throw oneapi::math::library_not_found(
                 "BLAS", "blas_library()",
-                std::string("failed to load BLAS library ") + ONEMKL_REF_BLAS_LIBNAME);
+                std::string("failed to load BLAS library ") + ONEMATH_REF_BLAS_LIBNAME);
         }
     }
     return h_libblas;
@@ -54,11 +54,11 @@ static LIB_TYPE blas_library() {
 static LIB_TYPE h_libcblas = NULL;
 static LIB_TYPE cblas_library() {
     if (h_libcblas == NULL) {
-        h_libcblas = GET_LIB_HANDLE(ONEMKL_REF_CBLAS_LIBNAME);
+        h_libcblas = GET_LIB_HANDLE(ONEMATH_REF_CBLAS_LIBNAME);
         if (h_libcblas == NULL) {
-            throw oneapi::mkl::library_not_found(
+            throw oneapi::math::library_not_found(
                 "BLAS", "cblas_library()",
-                std::string("failed to load CBLAS library ") + ONEMKL_REF_CBLAS_LIBNAME);
+                std::string("failed to load CBLAS library ") + ONEMATH_REF_CBLAS_LIBNAME);
         }
     }
     return h_libcblas;
diff --git a/tests/unit_tests/blas/include/test_common.hpp b/tests/unit_tests/blas/include/test_common.hpp
index 0b64d3acc..64df0bd76 100644
--- a/tests/unit_tests/blas/include/test_common.hpp
+++ b/tests/unit_tests/blas/include/test_common.hpp
@@ -67,21 +67,22 @@ constexpr int num_components() {
 
 // Matrix helpers.
 template <typename T>
-constexpr T inner_dimension(oneapi::mkl::transpose trans, T m, T n) {
-    return (trans == oneapi::mkl::transpose::nontrans) ? m : n;
+constexpr T inner_dimension(oneapi::math::transpose trans, T m, T n) {
+    return (trans == oneapi::math::transpose::nontrans) ? m : n;
 }
 template <typename T>
-constexpr T outer_dimension(oneapi::mkl::transpose trans, T m, T n) {
-    return (trans == oneapi::mkl::transpose::nontrans) ? n : m;
+constexpr T outer_dimension(oneapi::math::transpose trans, T m, T n) {
+    return (trans == oneapi::math::transpose::nontrans) ? n : m;
 }
 template <typename T>
-constexpr T matrix_size(oneapi::mkl::transpose trans, T m, T n, T ldm) {
+constexpr T matrix_size(oneapi::math::transpose trans, T m, T n, T ldm) {
     return outer_dimension(trans, m, n) * ldm;
 }
 template <typename T>
-constexpr T matrix_size(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, T m, T n, T ldm) {
-    return (layout == oneapi::mkl::layout::col_major) ? outer_dimension(trans, m, n) * ldm
-                                                      : inner_dimension(trans, m, n) * ldm;
+constexpr T matrix_size(oneapi::math::layout layout, oneapi::math::transpose trans, T m, T n,
+                        T ldm) {
+    return (layout == oneapi::math::layout::col_major) ? outer_dimension(trans, m, n) * ldm
+                                                       : inner_dimension(trans, m, n) * ldm;
 }
 
 // SYCL buffer creation helper.
@@ -192,28 +193,28 @@ void rand_vector(vec& v, int n, int inc) {
 }
 
 template <typename fp>
-oneapi::mkl::transpose rand_trans() {
+oneapi::math::transpose rand_trans() {
     std::int64_t tmp;
-    oneapi::mkl::transpose trans;
+    oneapi::math::transpose trans;
     if ((std::is_same<fp, float>::value) || (std::is_same<fp, double>::value)) {
-        trans = (oneapi::mkl::transpose)(std::rand() % 2);
+        trans = (oneapi::math::transpose)(std::rand() % 2);
     }
     else {
         tmp = std::rand() % 3;
         if (tmp == 2)
-            trans = oneapi::mkl::transpose::conjtrans;
+            trans = oneapi::math::transpose::conjtrans;
         else
-            trans = (oneapi::mkl::transpose)tmp;
+            trans = (oneapi::math::transpose)tmp;
     }
     return trans;
 }
 
 template <typename vec>
-void print_matrix(vec& M, oneapi::mkl::transpose trans, int m, int n, int ld, char* name) {
+void print_matrix(vec& M, oneapi::math::transpose trans, int m, int n, int ld, char* name) {
     std::cout << "Matrix " << name << ":\n";
     for (int i = 0; i < m; i++) {
         for (int j = 0; j < n; j++) {
-            if (trans == oneapi::mkl::transpose::nontrans)
+            if (trans == oneapi::math::transpose::nontrans)
                 std::cout << (double)M[i + j * ld] << " ";
             else
                 std::cout << (double)M[j + i * ld] << " ";
@@ -230,14 +231,14 @@ void copy_vector(fp* src, int n, int inc, fp* dest) {
 }
 
 template <typename vec_src, typename vec_dest>
-void copy_matrix(vec_src& src, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m,
+void copy_matrix(vec_src& src, oneapi::math::layout layout, oneapi::math::transpose trans, int m,
                  int n, int ld, vec_dest& dest) {
     using T_data = typename vec_dest::value_type;
     dest.resize(matrix_size(layout, trans, m, n, ld));
-    if (((trans == oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::col_major)) ||
-        ((trans != oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::row_major))) {
+    if (((trans == oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::col_major)) ||
+        ((trans != oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::row_major))) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++)
                 dest[i + j * ld] = (T_data)src[i + j * ld];
@@ -250,12 +251,12 @@ void copy_matrix(vec_src& src, oneapi::mkl::layout layout, oneapi::mkl::transpos
 }
 
 template <typename fp_src, typename fp_dst>
-void copy_matrix(fp_src* src, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m,
+void copy_matrix(fp_src* src, oneapi::math::layout layout, oneapi::math::transpose trans, int m,
                  int n, int ld, fp_dst* dest) {
-    if (((trans == oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::col_major)) ||
-        ((trans != oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::row_major))) {
+    if (((trans == oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::col_major)) ||
+        ((trans != oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::row_major))) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++)
                 dest[i + j * ld] = (fp_dst)src[i + j * ld];
@@ -268,12 +269,12 @@ void copy_matrix(fp_src* src, oneapi::mkl::layout layout, oneapi::mkl::transpose
 }
 
 template <typename vec>
-void rand_matrix(vec& M, oneapi::mkl::transpose trans, int m, int n, int ld) {
+void rand_matrix(vec& M, oneapi::math::transpose trans, int m, int n, int ld) {
     using fp = typename vec::value_type;
 
     M.resize(matrix_size(trans, m, n, ld));
 
-    if (trans == oneapi::mkl::transpose::nontrans) {
+    if (trans == oneapi::math::transpose::nontrans) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++)
                 M[i + j * ld] = rand_scalar<fp>();
@@ -286,16 +287,16 @@ void rand_matrix(vec& M, oneapi::mkl::transpose trans, int m, int n, int ld) {
 }
 
 template <typename vec>
-void rand_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n,
+void rand_matrix(vec& M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n,
                  int ld) {
     using fp = typename vec::value_type;
 
     M.resize(matrix_size(layout, trans, m, n, ld));
 
-    if (((trans == oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::col_major)) ||
-        ((trans != oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::row_major))) {
+    if (((trans == oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::col_major)) ||
+        ((trans != oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::row_major))) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++)
                 M[i + j * ld] = rand_scalar<fp>();
@@ -308,12 +309,12 @@ void rand_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::transpose tran
 }
 
 template <typename fp>
-void rand_matrix(fp* M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n,
+void rand_matrix(fp* M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n,
                  int ld) {
-    if (((trans == oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::col_major)) ||
-        ((trans != oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::row_major))) {
+    if (((trans == oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::col_major)) ||
+        ((trans != oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::row_major))) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++)
                 M[i + j * ld] = rand_scalar<fp>();
@@ -326,16 +327,16 @@ void rand_matrix(fp* M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans
 }
 
 template <typename vec>
-void rand_trsm_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m,
+void rand_trsm_matrix(vec& M, oneapi::math::layout layout, oneapi::math::transpose trans, int m,
                       int n, int ld) {
     using fp = typename vec::value_type;
 
     M.resize(matrix_size(layout, trans, m, n, ld));
 
-    if (((trans == oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::col_major)) ||
-        ((trans != oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::row_major))) {
+    if (((trans == oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::col_major)) ||
+        ((trans != oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::row_major))) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++) {
                 if (i == j)
@@ -356,12 +357,12 @@ void rand_trsm_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::transpose
 }
 
 template <typename fp>
-void rand_trsm_matrix(fp* M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n,
-                      int ld) {
-    if (((trans == oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::col_major)) ||
-        ((trans != oneapi::mkl::transpose::nontrans) &&
-         (layout == oneapi::mkl::layout::row_major))) {
+void rand_trsm_matrix(fp* M, oneapi::math::layout layout, oneapi::math::transpose trans, int m,
+                      int n, int ld) {
+    if (((trans == oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::col_major)) ||
+        ((trans != oneapi::math::transpose::nontrans) &&
+         (layout == oneapi::math::layout::row_major))) {
         for (int j = 0; j < n; j++)
             for (int i = 0; i < m; i++) {
                 if (i == j)
@@ -382,8 +383,8 @@ void rand_trsm_matrix(fp* M, oneapi::mkl::layout layout, oneapi::mkl::transpose
 }
 
 template <typename vec>
-void rand_tpsv_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-                      oneapi::mkl::transpose trans, int m) {
+void rand_tpsv_matrix(vec& M, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+                      oneapi::math::transpose trans, int m) {
     using fp = typename vec::value_type;
     std::vector<fp> tmp;
     int start, end, i, j, k = 0;
@@ -392,13 +393,13 @@ void rand_tpsv_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::uplo uppe
     M.resize((m * (m + 1)) / 2);
 
     for (j = 0; j < m; j++) {
-        if (layout == oneapi::mkl::layout::col_major) {
-            start = (upper_lower == oneapi::mkl::uplo::U) ? 0 : j;
-            end = (upper_lower == oneapi::mkl::uplo::U) ? j : m - 1;
+        if (layout == oneapi::math::layout::col_major) {
+            start = (upper_lower == oneapi::math::uplo::U) ? 0 : j;
+            end = (upper_lower == oneapi::math::uplo::U) ? j : m - 1;
         }
         else {
-            start = (upper_lower == oneapi::mkl::uplo::U) ? j : 0;
-            end = (upper_lower == oneapi::mkl::uplo::U) ? m - 1 : j;
+            start = (upper_lower == oneapi::math::uplo::U) ? j : 0;
+            end = (upper_lower == oneapi::math::uplo::U) ? m - 1 : j;
         }
         for (i = start; i <= end; i++) {
             M[k] = tmp[i + j * m];
@@ -408,8 +409,8 @@ void rand_tpsv_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::uplo uppe
 }
 
 template <typename vec>
-void rand_tbsv_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-                      oneapi::mkl::transpose trans, int m, int k, int ld) {
+void rand_tbsv_matrix(vec& M, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+                      oneapi::math::transpose trans, int m, int k, int ld) {
     using fp = typename vec::value_type;
     std::vector<fp> tmp;
     int i, j, n;
@@ -417,8 +418,8 @@ void rand_tbsv_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::uplo uppe
     rand_trsm_matrix(tmp, layout, trans, m, m, ld);
     M.resize(matrix_size(layout, trans, m, m, ld));
 
-    if (((layout == oneapi::mkl::layout::col_major) && (upper_lower == oneapi::mkl::uplo::U)) ||
-        ((layout == oneapi::mkl::layout::row_major) && (upper_lower == oneapi::mkl::uplo::L))) {
+    if (((layout == oneapi::math::layout::col_major) && (upper_lower == oneapi::math::uplo::U)) ||
+        ((layout == oneapi::math::layout::row_major) && (upper_lower == oneapi::math::uplo::L))) {
         for (j = 0; j < m; j++) {
             n = k - j;
             for (i = std::max(0, j - k); i <= j; i++) {
@@ -564,13 +565,13 @@ bool check_equal_trsv_vector(vec1& v, vec2& v_ref, int n, int inc, int error_mag
 }
 
 template <typename acc1, typename acc2>
-bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout, int m, int n, int ld,
+bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::math::layout layout, int m, int n, int ld,
                         int error_mag, std::ostream& out) {
     bool good = true;
     int idx, count = 0;
     for (int j = 0; j < n; j++) {
         for (int i = 0; i < m; i++) {
-            idx = (layout == oneapi::mkl::layout::col_major) ? i + j * ld : j + i * ld;
+            idx = (layout == oneapi::math::layout::col_major) ? i + j * ld : j + i * ld;
             if (!check_equal(M[idx], M_ref[idx], error_mag)) {
                 out << "Difference in entry (" << i << ',' << j << "): DPC++ " << M[idx]
                     << " vs. Reference " << M_ref[idx] << std::endl;
@@ -586,13 +587,13 @@ bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout, int m,
 }
 
 template <typename fp>
-bool check_equal_matrix(const fp* M, const fp* M_ref, oneapi::mkl::layout layout, int m, int n,
+bool check_equal_matrix(const fp* M, const fp* M_ref, oneapi::math::layout layout, int m, int n,
                         int ld, int error_mag, std::ostream& out) {
     bool good = true;
     int idx, count = 0;
     for (int j = 0; j < n; j++) {
         for (int i = 0; i < m; i++) {
-            idx = (layout == oneapi::mkl::layout::col_major) ? i + j * ld : j + i * ld;
+            idx = (layout == oneapi::math::layout::col_major) ? i + j * ld : j + i * ld;
             if (!check_equal(M[idx], M_ref[idx], error_mag)) {
                 out << "Difference in entry (" << i << ',' << j << "): DPC++ " << M[idx]
                     << " vs. Reference " << M_ref[idx] << std::endl;
@@ -608,16 +609,16 @@ bool check_equal_matrix(const fp* M, const fp* M_ref, oneapi::mkl::layout layout
 }
 
 template <typename acc1, typename acc2>
-bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout,
-                        oneapi::mkl::uplo upper_lower, int m, int n, int ld, int error_mag,
+bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::math::layout layout,
+                        oneapi::math::uplo upper_lower, int m, int n, int ld, int error_mag,
                         std::ostream& out) {
     bool good = true;
     int idx, count = 0;
     for (int j = 0; j < n; j++) {
         for (int i = 0; i < m; i++) {
-            idx = (layout == oneapi::mkl::layout::col_major) ? i + j * ld : j + i * ld;
-            if (((upper_lower == oneapi::mkl::uplo::upper) && (j >= i)) ||
-                ((upper_lower == oneapi::mkl::uplo::lower) && (j <= i))) {
+            idx = (layout == oneapi::math::layout::col_major) ? i + j * ld : j + i * ld;
+            if (((upper_lower == oneapi::math::uplo::upper) && (j >= i)) ||
+                ((upper_lower == oneapi::math::uplo::lower) && (j <= i))) {
                 if (!check_equal(M[idx], M_ref[idx], error_mag)) {
                     out << "Difference in entry (" << i << ',' << j << "): DPC++ " << M[idx]
                         << " vs. Reference " << M_ref[idx] << std::endl;
@@ -634,13 +635,13 @@ bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout,
 }
 
 template <typename acc1, typename acc2>
-bool check_equal_trsm_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout, int m, int n, int ld,
-                             int error_mag, std::ostream& out) {
+bool check_equal_trsm_matrix(acc1& M, acc2& M_ref, oneapi::math::layout layout, int m, int n,
+                             int ld, int error_mag, std::ostream& out) {
     bool good = true;
     int idx, count = 0;
     for (int j = 0; j < n; j++) {
         for (int i = 0; i < m; i++) {
-            idx = (layout == oneapi::mkl::layout::col_major) ? i + j * ld : j + i * ld;
+            idx = (layout == oneapi::math::layout::col_major) ? i + j * ld : j + i * ld;
             if (!check_equal_trsm(M[idx], M_ref[idx], error_mag)) {
                 out << "Difference in entry (" << i << ',' << j << "): DPC++ " << M[idx]
                     << " vs. Reference " << M_ref[idx] << std::endl;
@@ -677,14 +678,14 @@ typename std::enable_if<std::is_integral<fp>::value, bool>::type check_almost_eq
 }
 
 template <typename Ta, typename Tb>
-bool check_almost_equal_matrix_int(Ta& M, Tb& M_ref, oneapi::mkl::layout layout, int m, int n,
+bool check_almost_equal_matrix_int(Ta& M, Tb& M_ref, oneapi::math::layout layout, int m, int n,
                                    int ld, int error_mag, std::ostream& out) {
     static_assert(is_matrix_type_integral<Ta>() && is_matrix_type_integral<Tb>());
     bool good = true;
     int idx, count = 0;
     for (int j = 0; j < n; j++) {
         for (int i = 0; i < m; i++) {
-            idx = (layout == oneapi::mkl::layout::col_major) ? i + j * ld : j + i * ld;
+            idx = (layout == oneapi::math::layout::col_major) ? i + j * ld : j + i * ld;
             if (!check_almost_equal_int(M[idx], M_ref[idx], error_mag)) {
                 out << "Difference in entry (" << i << ',' << j << "): DPC++ " << M[idx]
                     << " vs. Reference " << M_ref[idx] << std::endl;
@@ -700,7 +701,7 @@ bool check_almost_equal_matrix_int(Ta& M, Tb& M_ref, oneapi::mkl::layout layout,
 }
 
 template <typename Ta, typename Tb>
-bool check_almost_equal_matrix(Ta& M, Tb& M_ref, oneapi::mkl::layout layout, int m, int n, int ld,
+bool check_almost_equal_matrix(Ta& M, Tb& M_ref, oneapi::math::layout layout, int m, int n, int ld,
                                int error_mag, std::ostream& out) {
     // Only call if returned dtype is integral
     if constexpr (is_matrix_type_integral<Ta>() && is_matrix_type_integral<Tb>())
diff --git a/tests/unit_tests/blas/level1/CMakeLists.txt b/tests/unit_tests/blas/level1/CMakeLists.txt
index 6a67ef2e8..21d70657e 100644
--- a/tests/unit_tests/blas/level1/CMakeLists.txt
+++ b/tests/unit_tests/blas/level1/CMakeLists.txt
@@ -41,7 +41,7 @@ if(BUILD_SHARED_LIBS)
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET blas_level1_rt SOURCES ${L1_SOURCES})
   else()
-    target_link_libraries(blas_level1_rt PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(blas_level1_rt PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 endif()
 
@@ -58,5 +58,5 @@ target_include_directories(blas_level1_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET blas_level1_ct SOURCES ${L1_SOURCES})
 else()
-  target_link_libraries(blas_level1_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(blas_level1_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
diff --git a/tests/unit_tests/blas/level1/asum.cpp b/tests/unit_tests/blas/level1/asum.cpp
index 6969789e3..a2caef8d9 100644
--- a/tests/unit_tests/blas/level1/asum.cpp
+++ b/tests/unit_tests/blas/level1/asum.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_res>
-int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
+int test(device* dev, oneapi::math::layout layout, int64_t N, int64_t incx) {
     // Prepare data.
     vector<fp> x;
     fp_res result = fp_res(-1), result_ref = fp_res(-1);
@@ -82,23 +82,24 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::asum(main_queue, N, x_buffer, incx, result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::asum(main_queue, N, x_buffer, incx,
+                                                       result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::asum(main_queue, N, x_buffer, incx, result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::asum(main_queue, N, x_buffer, incx, result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::asum, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::asum, N,
                                         x_buffer, incx, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::asum, N, x_buffer,
-                                        incx, result_buffer);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::asum, N,
+                                        x_buffer, incx, result_buffer);
                 break;
             default: break;
         }
@@ -109,7 +110,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -125,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
     return (int)good;
 }
 
-class AsumTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class AsumTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(AsumTests, RealSinglePrecision) {
@@ -170,8 +171,8 @@ TEST_P(AsumTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AsumTestSuite, AsumTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/asum_usm.cpp b/tests/unit_tests/blas/level1/asum_usm.cpp
index b42799abd..64be04afc 100644
--- a/tests/unit_tests/blas/level1/asum_usm.cpp
+++ b/tests/unit_tests/blas/level1/asum_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_res, usm::alloc alloc_type = usm::alloc::shared>
-int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
+int test(device* dev, oneapi::math::layout layout, int64_t N, int64_t incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -83,10 +83,10 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
 
     fp_res* result_p;
     if constexpr (alloc_type == usm::alloc::shared) {
-        result_p = (fp_res*)oneapi::mkl::malloc_shared(64, sizeof(fp_res), *dev, cxt);
+        result_p = (fp_res*)oneapi::math::malloc_shared(64, sizeof(fp_res), *dev, cxt);
     }
     else if constexpr (alloc_type == usm::alloc::device) {
-        result_p = (fp_res*)oneapi::mkl::malloc_device(64, sizeof(fp_res), *dev, cxt);
+        result_p = (fp_res*)oneapi::math::malloc_device(64, sizeof(fp_res), *dev, cxt);
     }
     else {
         throw std::runtime_error("Bad alloc_type");
@@ -95,26 +95,26 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::asum(main_queue, N, x.data(), incx,
-                                                             result_p, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::asum(main_queue, N, x.data(), incx,
+                                                              result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::asum(main_queue, N, x.data(), incx, result_p,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::asum(main_queue, N, x.data(), incx, result_p,
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::asum, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::asum, N,
                                         x.data(), incx, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::asum, N, x.data(),
-                                        incx, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::asum, N,
+                                        x.data(), incx, result_p, dependencies);
                 break;
             default: break;
         }
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -138,13 +138,13 @@ int test(device* dev, oneapi::mkl::layout layout, int64_t N, int64_t incx) {
 
     bool good = check_equal_ptr(main_queue, result_p, result_ref, N, std::cout);
 
-    oneapi::mkl::free_usm(result_p, cxt);
+    oneapi::math::free_usm(result_p, cxt);
 
     return (int)good;
 }
 
 class AsumUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(AsumUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(
@@ -196,8 +196,8 @@ TEST_P(AsumUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AsumUsmTestSuite, AsumUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/axpby.cpp b/tests/unit_tests/blas/level1/axpby.cpp
index 4234e5259..3b063404b 100644
--- a/tests/unit_tests/blas/level1/axpby.cpp
+++ b/tests/unit_tests/blas/level1/axpby.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha, fp beta) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha, fp beta) {
     // Prepare data.
     vector<fp> x, y, y_ref;
 
@@ -85,24 +85,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::axpby(main_queue, N, alpha, x_buffer, incx, beta,
-                                                       y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::axpby(main_queue, N, alpha, x_buffer, incx, beta,
+                                                        y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::axpby(main_queue, N, alpha, x_buffer, incx, beta,
-                                                    y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::axpby(main_queue, N, alpha, x_buffer, incx, beta,
+                                                     y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpby, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpby, N,
                                         alpha, x_buffer, incx, beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpby, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpby, N, alpha,
                                         x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
@@ -114,7 +114,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -130,8 +130,8 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     return (int)good;
 }
 
-class AxpbyTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
-};
+class AxpbyTests
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(AxpbyTests, RealSinglePrecision) {
     float alpha(2.0);
@@ -180,8 +180,8 @@ TEST_P(AxpbyTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AxpbyTestSuite, AxpbyTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/axpby_usm.cpp b/tests/unit_tests/blas/level1/axpby_usm.cpp
index 1459f1900..3bd8b4a71 100644
--- a/tests/unit_tests/blas/level1/axpby_usm.cpp
+++ b/tests/unit_tests/blas/level1/axpby_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha, fp beta) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha, fp beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -87,25 +87,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::axpby(main_queue, N, alpha, x.data(), incx,
-                                                              beta, y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::axpby(main_queue, N, alpha, x.data(), incx,
+                                                               beta, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::axpby(main_queue, N, alpha, x.data(), incx,
-                                                           beta, y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::axpby(main_queue, N, alpha, x.data(), incx,
+                                                            beta, y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpby, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpby, N,
                                         alpha, x.data(), incx, beta, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpby, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpby, N, alpha,
                                         x.data(), incx, beta, y.data(), incy, dependencies);
                 break;
             default: break;
@@ -118,7 +118,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,7 +134,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
 }
 
 class AxpbyUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(AxpbyUsmTests, RealSinglePrecision) {
     float alpha(2.0);
@@ -183,8 +183,8 @@ TEST_P(AxpbyUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AxpbyUsmTestSuite, AxpbyUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/axpy.cpp b/tests/unit_tests/blas/level1/axpy.cpp
index a0fbdc4c6..5c6557de6 100644
--- a/tests/unit_tests/blas/level1/axpy.cpp
+++ b/tests/unit_tests/blas/level1/axpy.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha) {
     // Prepare data.
     vector<fp> x, y, y_ref;
 
@@ -84,24 +84,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::axpy(main_queue, N, alpha, x_buffer, incx,
-                                                      y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::axpy(main_queue, N, alpha, x_buffer, incx,
+                                                       y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::axpy(main_queue, N, alpha, x_buffer, incx, y_buffer,
-                                                   incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::axpy(main_queue, N, alpha, x_buffer, incx, y_buffer,
+                                                    incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpy, N, alpha,
-                                        x_buffer, incx, y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy, N,
+                                        alpha, x_buffer, incx, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpy, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy, N, alpha,
                                         x_buffer, incx, y_buffer, incy);
                 break;
             default: break;
@@ -113,7 +113,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -129,7 +129,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     return (int)good;
 }
 
-class AxpyTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class AxpyTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(AxpyTests, RealSinglePrecision) {
@@ -175,8 +175,8 @@ TEST_P(AxpyTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AxpyTestSuite, AxpyTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/axpy_usm.cpp b/tests/unit_tests/blas/level1/axpy_usm.cpp
index 651b70a58..737c44de3 100644
--- a/tests/unit_tests/blas/level1/axpy_usm.cpp
+++ b/tests/unit_tests/blas/level1/axpy_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -86,25 +86,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::axpy(main_queue, N, alpha, x.data(), incx,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::axpy(main_queue, N, alpha, x.data(), incx,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::axpy(main_queue, N, alpha, x.data(), incx,
-                                                          y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::axpy(main_queue, N, alpha, x.data(), incx,
+                                                           y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpy, N, alpha,
-                                        x.data(), incx, y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy, N,
+                                        alpha, x.data(), incx, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpy, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy, N, alpha,
                                         x.data(), incx, y.data(), incy, dependencies);
                 break;
             default: break;
@@ -117,7 +117,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -133,7 +133,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
 }
 
 class AxpyUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(AxpyUsmTests, RealSinglePrecision) {
     float alpha(2.0);
@@ -178,8 +178,8 @@ TEST_P(AxpyUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(AxpyUsmTestSuite, AxpyUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/copy.cpp b/tests/unit_tests/blas/level1/copy.cpp
index 87a1c2f1b..c4ed07a59 100644
--- a/tests/unit_tests/blas/level1/copy.cpp
+++ b/tests/unit_tests/blas/level1/copy.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Prepare data.
     vector<fp> x, y, y_ref;
 
@@ -84,24 +84,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::copy(main_queue, N, x_buffer, incx, y_buffer,
-                                                      incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::copy(main_queue, N, x_buffer, incx, y_buffer,
+                                                       incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::copy(main_queue, N, x_buffer, incx, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::copy(main_queue, N, x_buffer, incx, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::copy, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::copy, N,
                                         x_buffer, incx, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::copy, N, x_buffer,
-                                        incx, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy, N,
+                                        x_buffer, incx, y_buffer, incy);
                 break;
             default: break;
         }
@@ -112,7 +112,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -128,7 +128,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     return (int)good;
 }
 
-class CopyTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class CopyTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(CopyTests, RealSinglePrecision) {
@@ -164,8 +164,8 @@ TEST_P(CopyTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(CopyTestSuite, CopyTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/copy_usm.cpp b/tests/unit_tests/blas/level1/copy_usm.cpp
index 0f491015b..aa2d59c2a 100644
--- a/tests/unit_tests/blas/level1/copy_usm.cpp
+++ b/tests/unit_tests/blas/level1/copy_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -86,26 +86,26 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::copy(main_queue, N, x.data(), incx,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::copy(main_queue, N, x.data(), incx,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::copy(main_queue, N, x.data(), incx, y.data(),
-                                                          incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::copy(main_queue, N, x.data(), incx, y.data(),
+                                                           incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::copy, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::copy, N,
                                         x.data(), incx, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::copy, N, x.data(),
-                                        incx, y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy, N,
+                                        x.data(), incx, y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -117,7 +117,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -133,7 +133,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
 }
 
 class CopyUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(CopyUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2, 3));
@@ -168,8 +168,8 @@ TEST_P(CopyUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(CopyUsmTestSuite, CopyUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/dot.cpp b/tests/unit_tests/blas/level1/dot.cpp
index 11cb09bcc..055a2b472 100644
--- a/tests/unit_tests/blas/level1/dot.cpp
+++ b/tests/unit_tests/blas/level1/dot.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_res>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Prepare data.
     vector<fp> x, y;
     fp_res result = fp_res(-1), result_ref = fp_res(-1);
@@ -84,24 +84,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::dot(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                     result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::dot(main_queue, N, x_buffer, incx, y_buffer, incy,
+                                                      result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::dot(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                  result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::dot(main_queue, N, x_buffer, incx, y_buffer, incy,
+                                                   result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dot, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dot, N,
                                         x_buffer, incx, y_buffer, incy, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dot, N, x_buffer,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dot, N, x_buffer,
                                         incx, y_buffer, incy, result_buffer);
                 break;
             default: break;
@@ -113,7 +113,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -129,7 +129,8 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     return (int)good;
 }
 
-class DotTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+class DotTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
+};
 
 TEST_P(DotTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(
@@ -161,8 +162,8 @@ TEST_P(DotTests, RealDoubleSinglePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(DotTestSuite, DotTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/dot_usm.cpp b/tests/unit_tests/blas/level1/dot_usm.cpp
index b8780c75d..de775c4bf 100644
--- a/tests/unit_tests/blas/level1/dot_usm.cpp
+++ b/tests/unit_tests/blas/level1/dot_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_res, usm::alloc alloc_type = usm::alloc::shared>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -83,10 +83,10 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
 
     fp_res* result_p;
     if constexpr (alloc_type == usm::alloc::shared) {
-        result_p = (fp_res*)oneapi::mkl::malloc_shared(64, sizeof(fp_res), *dev, cxt);
+        result_p = (fp_res*)oneapi::math::malloc_shared(64, sizeof(fp_res), *dev, cxt);
     }
     else if constexpr (alloc_type == usm::alloc::device) {
-        result_p = (fp_res*)oneapi::mkl::malloc_device(64, sizeof(fp_res), *dev, cxt);
+        result_p = (fp_res*)oneapi::math::malloc_device(64, sizeof(fp_res), *dev, cxt);
     }
     else {
         throw std::runtime_error("Bad alloc_type");
@@ -95,25 +95,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::dot(main_queue, N, x.data(), incx, y.data(),
-                                                            incy, result_p, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::dot(
+                    main_queue, N, x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::dot(main_queue, N, x.data(), incx, y.data(),
-                                                         incy, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::dot(main_queue, N, x.data(), incx, y.data(),
+                                                          incy, result_p, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dot, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dot, N,
                                         x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dot, N, x.data(),
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dot, N, x.data(),
                                         incx, y.data(), incy, result_p, dependencies);
                 break;
             default: break;
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -137,13 +137,13 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     // Compare the results of reference implementation and DPC++ implementation.
     bool good = check_equal_ptr(main_queue, result_p, result_ref, N, std::cout);
 
-    oneapi::mkl::free_usm(result_p, cxt);
+    oneapi::math::free_usm(result_p, cxt);
 
     return (int)good;
 }
 
 class DotUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(DotUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(
@@ -181,8 +181,8 @@ TEST_P(DotUsmTests, RealDoubleSinglePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(DotUsmTestSuite, DotUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/dotc.cpp b/tests/unit_tests/blas/level1/dotc.cpp
index f420a5e9f..d6f6cafc2 100644
--- a/tests/unit_tests/blas/level1/dotc.cpp
+++ b/tests/unit_tests/blas/level1/dotc.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Prepare data.
     vector<fp> x, y;
     fp result = 0.0, result_reference = 0.0;
@@ -86,25 +86,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::dotc(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                      result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::dotc(main_queue, N, x_buffer, incx, y_buffer,
+                                                       incy, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::dotc(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                   result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::dotc(main_queue, N, x_buffer, incx, y_buffer, incy,
+                                                    result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dotc, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dotc, N,
                                         x_buffer, incx, y_buffer, incy, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dotc, N, x_buffer,
-                                        incx, y_buffer, incy, result_buffer);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotc, N,
+                                        x_buffer, incx, y_buffer, incy, result_buffer);
                 break;
             default: break;
         }
@@ -115,7 +115,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -131,7 +131,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     return (int)good;
 }
 
-class DotcTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class DotcTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(DotcTests, ComplexSinglePrecision) {
@@ -155,8 +155,8 @@ TEST_P(DotcTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(DotcTestSuite, DotcTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/dotc_usm.cpp b/tests/unit_tests/blas/level1/dotc_usm.cpp
index 9c08125f3..14d65968f 100644
--- a/tests/unit_tests/blas/level1/dotc_usm.cpp
+++ b/tests/unit_tests/blas/level1/dotc_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -83,31 +83,31 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
 
     // Call DPC++ DOTC.
 
-    auto result_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
+    auto result_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
 
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::dotc(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::dotc(
                     main_queue, N, x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::dotc(main_queue, N, x.data(), incx, y.data(),
-                                                          incy, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::dotc(main_queue, N, x.data(), incx, y.data(),
+                                                           incy, result_p, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dotc, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dotc, N,
                                         x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dotc, N, x.data(),
-                                        incx, y.data(), incy, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotc, N,
+                                        x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
             default: break;
         }
@@ -119,7 +119,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -131,13 +131,13 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
 
     bool good = check_equal(*result_p, result_reference, N, std::cout);
 
-    oneapi::mkl::free_shared(result_p, cxt);
+    oneapi::math::free_shared(result_p, cxt);
 
     return (int)good;
 }
 
 class DotcUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(DotcUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(
@@ -160,8 +160,8 @@ TEST_P(DotcUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(DotcUsmTestSuite, DotcUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/dotu.cpp b/tests/unit_tests/blas/level1/dotu.cpp
index b6b3dd536..fd70e4fa1 100644
--- a/tests/unit_tests/blas/level1/dotu.cpp
+++ b/tests/unit_tests/blas/level1/dotu.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Prepare data.
     vector<fp> x, y;
     fp result = 0.0, result_reference = 0.0;
@@ -86,25 +86,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::dotu(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                      result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::dotu(main_queue, N, x_buffer, incx, y_buffer,
+                                                       incy, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::dotu(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                   result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::dotu(main_queue, N, x_buffer, incx, y_buffer, incy,
+                                                    result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dotu, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dotu, N,
                                         x_buffer, incx, y_buffer, incy, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dotu, N, x_buffer,
-                                        incx, y_buffer, incy, result_buffer);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotu, N,
+                                        x_buffer, incx, y_buffer, incy, result_buffer);
                 break;
             default: break;
         }
@@ -115,7 +115,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -131,7 +131,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     return (int)good;
 }
 
-class DotuTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class DotuTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(DotuTests, ComplexSinglePrecision) {
@@ -155,8 +155,8 @@ TEST_P(DotuTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(DotuTestSuite, DotuTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/dotu_usm.cpp b/tests/unit_tests/blas/level1/dotu_usm.cpp
index 6f7c4a63f..e1a34c5d2 100644
--- a/tests/unit_tests/blas/level1/dotu_usm.cpp
+++ b/tests/unit_tests/blas/level1/dotu_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -83,31 +83,31 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
 
     // Call DPC++ DOTU.
 
-    auto result_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
+    auto result_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
 
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::dotu(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::dotu(
                     main_queue, N, x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::dotu(main_queue, N, x.data(), incx, y.data(),
-                                                          incy, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::dotu(main_queue, N, x.data(), incx, y.data(),
+                                                           incy, result_p, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dotu, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dotu, N,
                                         x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dotu, N, x.data(),
-                                        incx, y.data(), incy, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotu, N,
+                                        x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
             default: break;
         }
@@ -119,7 +119,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -130,13 +130,13 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     bool good = check_equal(*result_p, result_reference, N, std::cout);
-    oneapi::mkl::free_shared(result_p, cxt);
+    oneapi::math::free_shared(result_p, cxt);
 
     return (int)good;
 }
 
 class DotuUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(DotuUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(
@@ -159,8 +159,8 @@ TEST_P(DotuUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(DotuUsmTestSuite, DotuUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/iamax.cpp b/tests/unit_tests/blas/level1/iamax.cpp
index 977f12b5d..e56f58c42 100644
--- a/tests/unit_tests/blas/level1/iamax.cpp
+++ b/tests/unit_tests/blas/level1/iamax.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx) {
     // Prepare data.
     vector<fp> x;
     int64_t result = -1, result_ref = -1;
@@ -82,23 +82,23 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::iamax(main_queue, N, x_buffer, incx,
-                                                       result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::iamax(main_queue, N, x_buffer, incx,
+                                                        result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::iamax(main_queue, N, x_buffer, incx, result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::iamax(main_queue, N, x_buffer, incx, result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::iamax, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::iamax, N,
                                         x_buffer, incx, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::iamax, N,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::iamax, N,
                                         x_buffer, incx, result_buffer);
                 break;
             default: break;
@@ -110,7 +110,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -126,8 +126,8 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     return (int)good;
 }
 
-class IamaxTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
-};
+class IamaxTests
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(IamaxTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2));
@@ -162,8 +162,8 @@ TEST_P(IamaxTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(IamaxTestSuite, IamaxTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/iamax_usm.cpp b/tests/unit_tests/blas/level1/iamax_usm.cpp
index 405a79532..a55127fa2 100644
--- a/tests/unit_tests/blas/level1/iamax_usm.cpp
+++ b/tests/unit_tests/blas/level1/iamax_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, usm::alloc alloc_type = usm::alloc::shared>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -82,10 +82,10 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
 
     int64_t* result_p;
     if constexpr (alloc_type == usm::alloc::shared) {
-        result_p = (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t), *dev, cxt);
+        result_p = (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t), *dev, cxt);
     }
     else if constexpr (alloc_type == usm::alloc::device) {
-        result_p = (int64_t*)oneapi::mkl::malloc_device(64, sizeof(int64_t), *dev, cxt);
+        result_p = (int64_t*)oneapi::math::malloc_device(64, sizeof(int64_t), *dev, cxt);
     }
     else {
         throw std::runtime_error("Bad alloc_type");
@@ -94,25 +94,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::iamax(main_queue, N, x.data(), incx,
-                                                              result_p, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::iamax(main_queue, N, x.data(), incx,
+                                                               result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::iamax(main_queue, N, x.data(), incx, result_p,
-                                                           dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::iamax(main_queue, N, x.data(), incx, result_p,
+                                                            dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::iamax, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::iamax, N,
                                         x.data(), incx, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::iamax, N,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::iamax, N,
                                         x.data(), incx, result_p, dependencies);
                 break;
             default: break;
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,13 +136,13 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     bool good = check_equal_ptr(main_queue, result_p, result_ref, 0, std::cout);
-    oneapi::mkl::free_usm(result_p, cxt);
+    oneapi::math::free_usm(result_p, cxt);
 
     return (int)good;
 }
 
 class IamaxUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(IamaxUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2));
@@ -185,8 +185,8 @@ TEST_P(IamaxUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(IamaxUsmTestSuite, IamaxUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/iamin.cpp b/tests/unit_tests/blas/level1/iamin.cpp
index a52862cb6..d0673a2cd 100644
--- a/tests/unit_tests/blas/level1/iamin.cpp
+++ b/tests/unit_tests/blas/level1/iamin.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx) {
     // Prepare data.
     vector<fp> x;
     int64_t result = -1, result_ref = -1;
@@ -82,23 +82,23 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::iamin(main_queue, N, x_buffer, incx,
-                                                       result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::iamin(main_queue, N, x_buffer, incx,
+                                                        result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::iamin(main_queue, N, x_buffer, incx, result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::iamin(main_queue, N, x_buffer, incx, result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::iamin, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::iamin, N,
                                         x_buffer, incx, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::iamin, N,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::iamin, N,
                                         x_buffer, incx, result_buffer);
                 break;
             default: break;
@@ -110,7 +110,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -126,8 +126,8 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     return (int)good;
 }
 
-class IaminTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
-};
+class IaminTests
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(IaminTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2));
@@ -162,8 +162,8 @@ TEST_P(IaminTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(IaminTestSuite, IaminTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/iamin_usm.cpp b/tests/unit_tests/blas/level1/iamin_usm.cpp
index a3523c8e7..beb535595 100644
--- a/tests/unit_tests/blas/level1/iamin_usm.cpp
+++ b/tests/unit_tests/blas/level1/iamin_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, usm::alloc alloc_type = usm::alloc::shared>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -82,10 +82,10 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
 
     int64_t* result_p;
     if constexpr (alloc_type == usm::alloc::shared) {
-        result_p = (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t), *dev, cxt);
+        result_p = (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t), *dev, cxt);
     }
     else if constexpr (alloc_type == usm::alloc::device) {
-        result_p = (int64_t*)oneapi::mkl::malloc_device(64, sizeof(int64_t), *dev, cxt);
+        result_p = (int64_t*)oneapi::math::malloc_device(64, sizeof(int64_t), *dev, cxt);
     }
     else {
         throw std::runtime_error("Bad alloc_type");
@@ -94,25 +94,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::iamin(main_queue, N, x.data(), incx,
-                                                              result_p, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::iamin(main_queue, N, x.data(), incx,
+                                                               result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::iamin(main_queue, N, x.data(), incx, result_p,
-                                                           dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::iamin(main_queue, N, x.data(), incx, result_p,
+                                                            dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::iamin, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::iamin, N,
                                         x.data(), incx, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::iamin, N,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::iamin, N,
                                         x.data(), incx, result_p, dependencies);
                 break;
             default: break;
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,13 +136,13 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     bool good = check_equal_ptr(main_queue, result_p, result_ref, 0, std::cout);
-    oneapi::mkl::free_usm(result_p, cxt);
+    oneapi::math::free_usm(result_p, cxt);
 
     return (int)good;
 }
 
 class IaminUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(IaminUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2));
@@ -185,8 +185,8 @@ TEST_P(IaminUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(IaminUsmTestSuite, IaminUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/nrm2.cpp b/tests/unit_tests/blas/level1/nrm2.cpp
index 423cecb59..30ddd63fd 100644
--- a/tests/unit_tests/blas/level1/nrm2.cpp
+++ b/tests/unit_tests/blas/level1/nrm2.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_res>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx) {
     // Prepare data.
     vector<fp> x;
     fp_res result = fp_res(-1), result_ref = fp_res(-1);
@@ -83,23 +83,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::nrm2(main_queue, N, x_buffer, incx, result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::nrm2(main_queue, N, x_buffer, incx,
+                                                       result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::nrm2(main_queue, N, x_buffer, incx, result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::nrm2(main_queue, N, x_buffer, incx, result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::nrm2, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::nrm2, N,
                                         x_buffer, incx, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::nrm2, N, x_buffer,
-                                        incx, result_buffer);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::nrm2, N,
+                                        x_buffer, incx, result_buffer);
                 break;
             default: break;
         }
@@ -110,7 +111,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -126,7 +127,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     return (int)good;
 }
 
-class Nrm2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class Nrm2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(Nrm2Tests, RealSinglePrecision) {
@@ -168,8 +169,8 @@ TEST_P(Nrm2Tests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(Nrm2TestSuite, Nrm2Tests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/nrm2_usm.cpp b/tests/unit_tests/blas/level1/nrm2_usm.cpp
index 8628738f4..87470eb92 100644
--- a/tests/unit_tests/blas/level1/nrm2_usm.cpp
+++ b/tests/unit_tests/blas/level1/nrm2_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_res, usm::alloc alloc_type = usm::alloc::shared>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -83,10 +83,10 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
 
     fp_res* result_p;
     if constexpr (alloc_type == usm::alloc::shared) {
-        result_p = (fp_res*)oneapi::mkl::malloc_shared(64, sizeof(fp_res), *dev, cxt);
+        result_p = (fp_res*)oneapi::math::malloc_shared(64, sizeof(fp_res), *dev, cxt);
     }
     else if constexpr (alloc_type == usm::alloc::device) {
-        result_p = (fp_res*)oneapi::mkl::malloc_device(64, sizeof(fp_res), *dev, cxt);
+        result_p = (fp_res*)oneapi::math::malloc_device(64, sizeof(fp_res), *dev, cxt);
     }
     else {
         throw std::runtime_error("Bad alloc_type");
@@ -95,26 +95,26 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::nrm2(main_queue, N, x.data(), incx,
-                                                             result_p, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::nrm2(main_queue, N, x.data(), incx,
+                                                              result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::nrm2(main_queue, N, x.data(), incx, result_p,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::nrm2(main_queue, N, x.data(), incx, result_p,
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::nrm2, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::nrm2, N,
                                         x.data(), incx, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::nrm2, N, x.data(),
-                                        incx, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::nrm2, N,
+                                        x.data(), incx, result_p, dependencies);
                 break;
             default: break;
         }
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -137,13 +137,13 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx) {
     // Compare the results of reference implementation and DPC++ implementation.
 
     bool good = check_equal_ptr(main_queue, result_p, result_ref, N, std::cout);
-    oneapi::mkl::free_usm(result_p, cxt);
+    oneapi::math::free_usm(result_p, cxt);
 
     return (int)good;
 }
 
 class Nrm2UsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Nrm2UsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(
@@ -192,8 +192,8 @@ TEST_P(Nrm2UsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(Nrm2UsmTestSuite, Nrm2UsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rot.cpp b/tests/unit_tests/blas/level1/rot.cpp
index 12a26ce71..6508283b3 100644
--- a/tests/unit_tests/blas/level1/rot.cpp
+++ b/tests/unit_tests/blas/level1/rot.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_scalar c,
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp_scalar c,
          fp_scalar s) {
     // Prepare data.
     vector<fp> x, x_ref, y, y_ref;
@@ -86,24 +86,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::rot(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                     c, s);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::rot(main_queue, N, x_buffer, incx, y_buffer, incy,
+                                                      c, s);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::rot(main_queue, N, x_buffer, incx, y_buffer, incy, c,
-                                                  s);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::rot(main_queue, N, x_buffer, incx, y_buffer, incy, c,
+                                                   s);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rot, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rot, N,
                                         x_buffer, incx, y_buffer, incy, c, s);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rot, N, x_buffer,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rot, N, x_buffer,
                                         incx, y_buffer, incy, c, s);
                 break;
             default: break;
@@ -115,7 +115,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -135,7 +135,8 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_
     return (int)good;
 }
 
-class RotTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+class RotTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
+};
 
 TEST_P(RotTests, RealSinglePrecision) {
     float c(2.0);
@@ -184,8 +185,8 @@ TEST_P(RotTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotTestSuite, RotTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rot_usm.cpp b/tests/unit_tests/blas/level1/rot_usm.cpp
index 6c19b0ceb..52bc8c2f5 100644
--- a/tests/unit_tests/blas/level1/rot_usm.cpp
+++ b/tests/unit_tests/blas/level1/rot_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_scalar c,
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp_scalar c,
          fp_scalar s) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -88,25 +88,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::rot(main_queue, N, x.data(), incx, y.data(),
-                                                            incy, c, s, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::rot(main_queue, N, x.data(), incx,
+                                                             y.data(), incy, c, s, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::rot(main_queue, N, x.data(), incx, y.data(),
-                                                         incy, c, s, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::rot(main_queue, N, x.data(), incx, y.data(),
+                                                          incy, c, s, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rot, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rot, N,
                                         x.data(), incx, y.data(), incy, c, s, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rot, N, x.data(),
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rot, N, x.data(),
                                         incx, y.data(), incy, c, s, dependencies);
                 break;
             default: break;
@@ -119,7 +119,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -137,7 +137,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_
 }
 
 class RotUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(RotUsmTests, RealSinglePrecision) {
     float c(2.0);
@@ -186,8 +186,8 @@ TEST_P(RotUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotUsmTestSuite, RotUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rotg.cpp b/tests/unit_tests/blas/level1/rotg.cpp
index 4abcddd39..4ccb8eb30 100644
--- a/tests/unit_tests/blas/level1/rotg.cpp
+++ b/tests/unit_tests/blas/level1/rotg.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Prepare data.
     fp a, b, s, a_ref, b_ref, s_ref;
     fp_scalar c, c_ref;
@@ -92,24 +92,24 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::rotg(main_queue, a_buffer, b_buffer, c_buffer,
-                                                      s_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::rotg(main_queue, a_buffer, b_buffer, c_buffer,
+                                                       s_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::rotg(main_queue, a_buffer, b_buffer, c_buffer,
-                                                   s_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::rotg(main_queue, a_buffer, b_buffer, c_buffer,
+                                                    s_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rotg, a_buffer,
-                                        b_buffer, c_buffer, s_buffer);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotg,
+                                        a_buffer, b_buffer, c_buffer, s_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rotg, a_buffer,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotg, a_buffer,
                                         b_buffer, c_buffer, s_buffer);
                 break;
             default: break;
@@ -121,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -144,7 +144,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
     return (int)good;
 }
 
-class RotgTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class RotgTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(RotgTests, RealSinglePrecision) {
@@ -180,8 +180,8 @@ TEST_P(RotgTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotgTestSuite, RotgTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rotg_usm.cpp b/tests/unit_tests/blas/level1/rotg_usm.cpp
index d078ff03a..f198bb393 100644
--- a/tests/unit_tests/blas/level1/rotg_usm.cpp
+++ b/tests/unit_tests/blas/level1/rotg_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar, usm::alloc alloc_type = usm::alloc::shared>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -89,16 +89,16 @@ int test(device* dev, oneapi::mkl::layout layout) {
     fp *a_p, *b_p, *s_p;
     fp_scalar* c_p;
     if constexpr (alloc_type == usm::alloc::shared) {
-        a_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
-        b_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
-        s_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
-        c_p = (fp_scalar*)oneapi::mkl::malloc_shared(64, sizeof(fp_scalar), *dev, cxt);
+        a_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
+        b_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
+        s_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
+        c_p = (fp_scalar*)oneapi::math::malloc_shared(64, sizeof(fp_scalar), *dev, cxt);
     }
     else if constexpr (alloc_type == usm::alloc::device) {
-        a_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt);
-        b_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt);
-        s_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt);
-        c_p = (fp_scalar*)oneapi::mkl::malloc_device(64, sizeof(fp_scalar), *dev, cxt);
+        a_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt);
+        b_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt);
+        s_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt);
+        c_p = (fp_scalar*)oneapi::math::malloc_device(64, sizeof(fp_scalar), *dev, cxt);
     }
     else {
         throw std::runtime_error("Bad alloc_type");
@@ -113,25 +113,25 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::rotg(main_queue, a_p, b_p, c_p, s_p,
-                                                             dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::rotg(main_queue, a_p, b_p, c_p, s_p,
+                                                              dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::rotg(main_queue, a_p, b_p, c_p, s_p,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::rotg(main_queue, a_p, b_p, c_p, s_p,
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rotg, a_p, b_p,
-                                        c_p, s_p, dependencies);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotg, a_p,
+                                        b_p, c_p, s_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rotg, a_p, b_p,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotg, a_p, b_p,
                                         c_p, s_p, dependencies);
                 break;
             default: break;
@@ -144,7 +144,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -161,16 +161,16 @@ int test(device* dev, oneapi::mkl::layout layout) {
 
     bool good = good_a && good_b && good_c && good_s;
 
-    oneapi::mkl::free_usm(a_p, cxt);
-    oneapi::mkl::free_usm(b_p, cxt);
-    oneapi::mkl::free_usm(s_p, cxt);
-    oneapi::mkl::free_usm(c_p, cxt);
+    oneapi::math::free_usm(a_p, cxt);
+    oneapi::math::free_usm(b_p, cxt);
+    oneapi::math::free_usm(s_p, cxt);
+    oneapi::math::free_usm(c_p, cxt);
 
     return (int)good;
 }
 
 class RotgUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(RotgUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP((test<float, float>(std::get<0>(GetParam()), std::get<1>(GetParam()))));
@@ -201,8 +201,8 @@ TEST_P(RotgUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotgUsmTestSuite, RotgUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rotm.cpp b/tests/unit_tests/blas/level1/rotm.cpp
index 4e4ba44ec..c9a38ff40 100644
--- a/tests/unit_tests/blas/level1/rotm.cpp
+++ b/tests/unit_tests/blas/level1/rotm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp flag) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp flag) {
     // Prepare data.
     vector<fp> x, x_ref, y, y_ref;
     vector<fp> param;
@@ -89,25 +89,25 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::rotm(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                      param_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::rotm(main_queue, N, x_buffer, incx, y_buffer,
+                                                       incy, param_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::rotm(main_queue, N, x_buffer, incx, y_buffer, incy,
-                                                   param_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::rotm(main_queue, N, x_buffer, incx, y_buffer, incy,
+                                                    param_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rotm, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotm, N,
                                         x_buffer, incx, y_buffer, incy, param_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rotm, N, x_buffer,
-                                        incx, y_buffer, incy, param_buffer);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotm, N,
+                                        x_buffer, incx, y_buffer, incy, param_buffer);
                 break;
             default: break;
         }
@@ -118,7 +118,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,7 +136,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     return (int)good;
 }
 
-class RotmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class RotmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(RotmTests, RealSinglePrecision) {
@@ -204,8 +204,8 @@ TEST_P(RotmTests, RealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotmTestSuite, RotmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rotm_usm.cpp b/tests/unit_tests/blas/level1/rotm_usm.cpp
index 79ce634a1..0c33bb467 100644
--- a/tests/unit_tests/blas/level1/rotm_usm.cpp
+++ b/tests/unit_tests/blas/level1/rotm_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp flag) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp flag) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -89,26 +89,26 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::rotm(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::rotm(
                     main_queue, N, x.data(), incx, y.data(), incy, param.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::rotm(main_queue, N, x.data(), incx, y.data(),
-                                                          incy, param.data(), dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::rotm(main_queue, N, x.data(), incx, y.data(),
+                                                           incy, param.data(), dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rotm, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotm, N,
                                         x.data(), incx, y.data(), incy, param.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rotm, N, x.data(),
-                                        incx, y.data(), incy, param.data(), dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotm, N,
+                                        x.data(), incx, y.data(), incy, param.data(), dependencies);
                 break;
             default: break;
         }
@@ -120,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -138,7 +138,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp
 }
 
 class RotmUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(RotmUsmTests, RealSinglePrecision) {
     float flag(-1.0);
@@ -205,8 +205,8 @@ TEST_P(RotmUsmTests, RealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotmUsmTestSuite, RotmUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rotmg.cpp b/tests/unit_tests/blas/level1/rotmg.cpp
index f62bd1cf9..2d18311b7 100644
--- a/tests/unit_tests/blas/level1/rotmg.cpp
+++ b/tests/unit_tests/blas/level1/rotmg.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Prepare data.
     fp d1, d2, x1, y1, d1_ref, d2_ref, x1_ref;
     vector<fp> param(5, fp(0)), param_ref(5, fp(0));
@@ -89,24 +89,24 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::rotmg(main_queue, d1_buffer, d2_buffer, x1_buffer,
-                                                       y1, param_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::rotmg(main_queue, d1_buffer, d2_buffer, x1_buffer,
+                                                        y1, param_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::rotmg(main_queue, d1_buffer, d2_buffer, x1_buffer, y1,
-                                                    param_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::rotmg(main_queue, d1_buffer, d2_buffer, x1_buffer,
+                                                     y1, param_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rotmg,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotmg,
                                         d1_buffer, d2_buffer, x1_buffer, y1, param_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rotmg, d1_buffer,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotmg, d1_buffer,
                                         d2_buffer, x1_buffer, y1, param_buffer);
                 break;
             default: break;
@@ -118,7 +118,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -187,8 +187,8 @@ int test(device* dev, oneapi::mkl::layout layout) {
     return (int)good;
 }
 
-class RotmgTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
-};
+class RotmgTests
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(RotmgTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -201,8 +201,8 @@ TEST_P(RotmgTests, RealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotmgTestSuite, RotmgTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/rotmg_usm.cpp b/tests/unit_tests/blas/level1/rotmg_usm.cpp
index 0afe7caca..dcb79b2dd 100644
--- a/tests/unit_tests/blas/level1/rotmg_usm.cpp
+++ b/tests/unit_tests/blas/level1/rotmg_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, usm::alloc alloc_type = usm::alloc::shared>
-int test(device* dev, oneapi::mkl::layout layout) {
+int test(device* dev, oneapi::math::layout layout) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -82,14 +82,14 @@ int test(device* dev, oneapi::mkl::layout layout) {
 
     fp *d1_p, *d2_p, *x1_p;
     if constexpr (alloc_type == usm::alloc::device) {
-        d1_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt);
-        d2_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt);
-        x1_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt);
+        d1_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt);
+        d2_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt);
+        x1_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt);
     }
     else if constexpr (alloc_type == usm::alloc::shared) {
-        d1_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
-        d2_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
-        x1_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt);
+        d1_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
+        d2_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
+        x1_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt);
     }
     else {
         throw std::runtime_error("Bad alloc_type");
@@ -108,26 +108,26 @@ int test(device* dev, oneapi::mkl::layout layout) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::rotmg(main_queue, d1_p, d2_p, x1_p, y1,
-                                                              param.data(), dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::rotmg(main_queue, d1_p, d2_p, x1_p, y1,
+                                                               param.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::rotmg(main_queue, d1_p, d2_p, x1_p, y1,
-                                                           param.data(), dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::rotmg(main_queue, d1_p, d2_p, x1_p, y1,
+                                                            param.data(), dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::rotmg, d1_p,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotmg, d1_p,
                                         d2_p, x1_p, y1, param.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::rotmg, d1_p, d2_p,
-                                        x1_p, y1, param.data(), dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotmg, d1_p,
+                                        d2_p, x1_p, y1, param.data(), dependencies);
                 break;
             default: break;
         }
@@ -139,7 +139,7 @@ int test(device* dev, oneapi::mkl::layout layout) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -205,15 +205,15 @@ int test(device* dev, oneapi::mkl::layout layout) {
     bool good =
         good_d1 && good_d2 && good_x1 && flag_good && h11_good && h12_good && h21_good && h22_good;
 
-    oneapi::mkl::free_usm(d1_p, cxt);
-    oneapi::mkl::free_usm(d2_p, cxt);
-    oneapi::mkl::free_usm(x1_p, cxt);
+    oneapi::math::free_usm(d1_p, cxt);
+    oneapi::math::free_usm(d2_p, cxt);
+    oneapi::math::free_usm(x1_p, cxt);
 
     return (int)good;
 }
 
 class RotmgUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(RotmgUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam())));
@@ -230,8 +230,8 @@ TEST_P(RotmgUsmTests, RealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(RotmgUsmTestSuite, RotmgUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/scal.cpp b/tests/unit_tests/blas/level1/scal.cpp
index 8901bb424..27b10d2a4 100644
--- a/tests/unit_tests/blas/level1/scal.cpp
+++ b/tests/unit_tests/blas/level1/scal.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alpha) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, fp_scalar alpha) {
     // Prepare data.
     vector<fp> x, x_ref;
 
@@ -55,11 +55,11 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alp
 
     // Call Reference SCAL.
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     const int N_ref = N, incx_ref = std::abs(incx);
 
-    ::scal(&N_ref, (fp_scalar_mkl*)&alpha, (fp_ref*)x_ref.data(), &incx_ref);
+    ::scal(&N_ref, (fp_scalar_ref*)&alpha, (fp_ref*)x_ref.data(), &incx_ref);
 
     // Call DPC++ SCAL.
 
@@ -84,22 +84,22 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alp
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::scal(main_queue, N, alpha, x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::scal(main_queue, N, alpha, x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::scal(main_queue, N, alpha, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::scal(main_queue, N, alpha, x_buffer, incx);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::scal, N, alpha,
-                                        x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::scal, N,
+                                        alpha, x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::scal, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::scal, N, alpha,
                                         x_buffer, incx);
                 break;
             default: break;
@@ -111,7 +111,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alp
     return (int)good;
 }
 
-class ScalTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class ScalTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(ScalTests, RealSinglePrecision) {
@@ -180,8 +180,8 @@ TEST_P(ScalTests, ComplexRealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(ScalTestSuite, ScalTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/scal_usm.cpp b/tests/unit_tests/blas/level1/scal_usm.cpp
index e669deb2d..0da8f8ac4 100644
--- a/tests/unit_tests/blas/level1/scal_usm.cpp
+++ b/tests/unit_tests/blas/level1/scal_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alpha) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, fp_scalar alpha) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -76,36 +76,36 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alp
 
     // Call Reference SCAL.
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     const int N_ref = N, incx_ref = std::abs(incx);
 
-    ::scal(&N_ref, (fp_scalar_mkl*)&alpha, (fp_ref*)x_ref.data(), &incx_ref);
+    ::scal(&N_ref, (fp_scalar_ref*)&alpha, (fp_ref*)x_ref.data(), &incx_ref);
 
     // Call DPC++ SCAL.
 
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::scal(main_queue, N, alpha, x.data(), incx,
-                                                             dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::scal(main_queue, N, alpha, x.data(), incx,
+                                                              dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::scal(main_queue, N, alpha, x.data(), incx,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::scal(main_queue, N, alpha, x.data(), incx,
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::scal, N, alpha,
-                                        x.data(), incx, dependencies);
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::scal, N,
+                                        alpha, x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::scal, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::scal, N, alpha,
                                         x.data(), incx, dependencies);
                 break;
             default: break;
@@ -118,7 +118,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alp
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,7 +134,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, fp_scalar alp
 }
 
 class ScalUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(ScalUsmTests, RealSinglePrecision) {
     float alpha(2.0);
@@ -187,8 +187,8 @@ TEST_P(ScalUsmTests, ComplexRealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(ScalUsmTestSuite, ScalUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/sdsdot.cpp b/tests/unit_tests/blas/level1/sdsdot.cpp
index 1030713f0..d8ae45449 100644
--- a/tests/unit_tests/blas/level1/sdsdot.cpp
+++ b/tests/unit_tests/blas/level1/sdsdot.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -45,7 +45,7 @@ extern std::vector<sycl::device*> devices;
 
 namespace {
 
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, float alpha) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, float alpha) {
     // Prepare data.
     vector<float> x, y;
     float result = float(-1), result_ref = float(-1);
@@ -84,24 +84,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::sdsdot(main_queue, N, alpha, x_buffer, incx,
-                                                        y_buffer, incy, result_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::sdsdot(main_queue, N, alpha, x_buffer, incx,
+                                                         y_buffer, incy, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::sdsdot(main_queue, N, alpha, x_buffer, incx, y_buffer,
-                                                     incy, result_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::sdsdot(main_queue, N, alpha, x_buffer, incx,
+                                                      y_buffer, incy, result_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::sdsdot, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::sdsdot, N,
                                         alpha, x_buffer, incx, y_buffer, incy, result_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::sdsdot, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sdsdot, N, alpha,
                                         x_buffer, incx, y_buffer, incy, result_buffer);
                 break;
             default: break;
@@ -113,7 +113,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -130,7 +130,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo
 }
 
 class SdsdotTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SdsdotTests, RealSinglePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -141,8 +141,8 @@ TEST_P(SdsdotTests, RealSinglePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(SdsdotTestSuite, SdsdotTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/sdsdot_usm.cpp b/tests/unit_tests/blas/level1/sdsdot_usm.cpp
index ab0221754..f7153d38c 100644
--- a/tests/unit_tests/blas/level1/sdsdot_usm.cpp
+++ b/tests/unit_tests/blas/level1/sdsdot_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -45,7 +45,7 @@ extern std::vector<sycl::device*> devices;
 
 namespace {
 
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, float alpha) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, float alpha) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -81,31 +81,31 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo
 
     // Call DPC++ SDSDOT.
 
-    auto result_p = (float*)oneapi::mkl::malloc_shared(64, sizeof(float), *dev, cxt);
+    auto result_p = (float*)oneapi::math::malloc_shared(64, sizeof(float), *dev, cxt);
 
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::sdsdot(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::sdsdot(
                     main_queue, N, alpha, x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::sdsdot(main_queue, N, alpha, x.data(), incx,
-                                                            y.data(), incy, result_p, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::sdsdot(
+                    main_queue, N, alpha, x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::sdsdot, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::sdsdot, N,
                                         alpha, x.data(), incx, y.data(), incy, result_p,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::sdsdot, N, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sdsdot, N, alpha,
                                         x.data(), incx, y.data(), incy, result_p, dependencies);
                 break;
             default: break;
@@ -118,7 +118,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -129,13 +129,13 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo
     // Compare the results of reference implementation and DPC++ implementation.
 
     bool good = check_equal(*result_p, result_ref, N, std::cout);
-    oneapi::mkl::free_shared(result_p, cxt);
+    oneapi::math::free_shared(result_p, cxt);
 
     return (int)good;
 }
 
 class SdsdotUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SdsdotUsmTests, RealSinglePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -146,8 +146,8 @@ TEST_P(SdsdotUsmTests, RealSinglePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(SdsdotUsmTestSuite, SdsdotUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/swap.cpp b/tests/unit_tests/blas/level1/swap.cpp
index 6c6721537..2fa39ff38 100644
--- a/tests/unit_tests/blas/level1/swap.cpp
+++ b/tests/unit_tests/blas/level1/swap.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Prepare data.
     vector<fp> x, x_ref, y, y_ref;
     rand_vector(x, N, incx);
@@ -84,24 +84,24 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::swap(main_queue, N, x_buffer, incx, y_buffer,
-                                                      incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::swap(main_queue, N, x_buffer, incx, y_buffer,
+                                                       incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::swap(main_queue, N, x_buffer, incx, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::swap(main_queue, N, x_buffer, incx, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::swap, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::swap, N,
                                         x_buffer, incx, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::swap, N, x_buffer,
-                                        incx, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::swap, N,
+                                        x_buffer, incx, y_buffer, incy);
                 break;
             default: break;
         }
@@ -112,7 +112,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -131,7 +131,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     return (int)good;
 }
 
-class SwapTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class SwapTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(SwapTests, RealSinglePrecision) {
@@ -167,8 +167,8 @@ TEST_P(SwapTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(SwapTestSuite, SwapTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level1/swap_usm.cpp b/tests/unit_tests/blas/level1/swap_usm.cpp
index de20f3eb7..5d8b1e059 100644
--- a/tests/unit_tests/blas/level1/swap_usm.cpp
+++ b/tests/unit_tests/blas/level1/swap_usm.cpp
@@ -29,9 +29,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -46,7 +46,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
+int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -86,26 +86,26 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::swap(main_queue, N, x.data(), incx,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::swap(main_queue, N, x.data(), incx,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::swap(main_queue, N, x.data(), incx, y.data(),
-                                                          incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::swap(main_queue, N, x.data(), incx, y.data(),
+                                                           incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::swap, N,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::swap, N,
                                         x.data(), incx, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::swap, N, x.data(),
-                                        incx, y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::swap, N,
+                                        x.data(), incx, y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -117,7 +117,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -135,7 +135,7 @@ int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) {
 }
 
 class SwapUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SwapUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2, 3));
@@ -170,8 +170,8 @@ TEST_P(SwapUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(SwapUsmTestSuite, SwapUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/CMakeLists.txt b/tests/unit_tests/blas/level2/CMakeLists.txt
index 5ab82a8b5..8d7bc9d63 100644
--- a/tests/unit_tests/blas/level2/CMakeLists.txt
+++ b/tests/unit_tests/blas/level2/CMakeLists.txt
@@ -41,7 +41,7 @@ if(BUILD_SHARED_LIBS)
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET blas_level2_rt SOURCES ${L2_SOURCES})
   else()
-    target_link_libraries(blas_level2_rt PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(blas_level2_rt PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 endif()
 
@@ -58,6 +58,6 @@ target_include_directories(blas_level2_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET blas_level2_ct SOURCES ${L2_SOURCES})
 else()
-  target_link_libraries(blas_level2_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(blas_level2_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
diff --git a/tests/unit_tests/blas/level2/gbmv.cpp b/tests/unit_tests/blas/level2/gbmv.cpp
index 20bc75490..096eaca44 100644
--- a/tests/unit_tests/blas/level2/gbmv.cpp
+++ b/tests/unit_tests/blas/level2/gbmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n,
          int kl, int ku, fp alpha, fp beta, int incx, int incy, int lda) {
     // Prepare data.
     int x_len = outer_dimension(transa, m, n);
@@ -58,7 +58,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     rand_vector(x, x_len, incx);
     rand_vector(y, y_len, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
 
     // Call Reference GBMV.
     const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda;
@@ -94,27 +94,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gbmv(main_queue, transa, m, n, kl, ku, alpha,
-                                                      A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                                      incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gbmv(main_queue, transa, m, n, kl, ku, alpha,
+                                                       A_buffer, lda, x_buffer, incx, beta,
+                                                       y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gbmv(main_queue, transa, m, n, kl, ku, alpha,
-                                                   A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                                   incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gbmv(main_queue, transa, m, n, kl, ku, alpha,
+                                                    A_buffer, lda, x_buffer, incx, beta, y_buffer,
+                                                    incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gbmv, transa,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gbmv, transa,
                                         m, n, kl, ku, alpha, A_buffer, lda, x_buffer, incx, beta,
                                         y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gbmv, transa, m,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gbmv, transa, m,
                                         n, kl, ku, alpha, A_buffer, lda, x_buffer, incx, beta,
                                         y_buffer, incy);
                 break;
@@ -127,7 +127,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -142,29 +142,29 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     return (int)good;
 }
 
-class GbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class GbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(GbmvTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2, 3,
-                                  42));
+                                  oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2,
+                                  3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
+                                  oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
                                   -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, 1,
-                                  42));
+                                  oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1,
+                                  1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
+                                  oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
                                   42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3,
+                                  oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3,
                                   42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
+                                  oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
                                   42));
 }
 TEST_P(GbmvTests, RealDoublePrecision) {
@@ -173,53 +173,53 @@ TEST_P(GbmvTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2,
+                                   oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2,
                                    3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
+                                   oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
                                    -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1,
+                                   oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1,
                                    1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
+                                   oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3,
-                                   42));
+                                   oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2,
+                                   -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
+                                   oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
                                    42));
 }
 TEST_P(GbmvTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                 alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                 alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                 alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
+                                                oneapi::math::transpose::trans, 25, 30, 5, 7, alpha,
                                                 beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
+                                                oneapi::math::transpose::trans, 25, 30, 5, 7, alpha,
                                                 beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
+                                                oneapi::math::transpose::trans, 25, 30, 5, 7, alpha,
                                                 beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                 alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                 alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                 alpha, beta, 1, 1, 42));
 }
 TEST_P(GbmvTests, ComplexDoublePrecision) {
@@ -228,38 +228,38 @@ TEST_P(GbmvTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                  alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                  alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                  alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
-                                                 beta, 2, 3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, 5, 7,
+                                                 alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
-                                                 beta, -2, -3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, 5, 7,
+                                                 alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
-                                                 beta, 1, 1, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, 5, 7,
+                                                 alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                  alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                  alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                  alpha, beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(GbmvTestSuite, GbmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/gbmv_usm.cpp b/tests/unit_tests/blas/level2/gbmv_usm.cpp
index ea66daab4..ec121e723 100644
--- a/tests/unit_tests/blas/level2/gbmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/gbmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n,
          int kl, int ku, fp alpha, fp beta, int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -76,7 +76,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 
     rand_vector(x, x_len, incx);
     rand_vector(y, y_len, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
 
     auto y_ref = y;
 
@@ -94,28 +94,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gbmv(main_queue, transa, m, n, kl, ku,
-                                                             alpha, A.data(), lda, x.data(), incx,
-                                                             beta, y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gbmv(main_queue, transa, m, n, kl, ku,
+                                                              alpha, A.data(), lda, x.data(), incx,
+                                                              beta, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gbmv(main_queue, transa, m, n, kl, ku, alpha,
-                                                          A.data(), lda, x.data(), incx, beta,
-                                                          y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gbmv(main_queue, transa, m, n, kl, ku, alpha,
+                                                           A.data(), lda, x.data(), incx, beta,
+                                                           y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gbmv, transa,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gbmv, transa,
                                         m, n, kl, ku, alpha, A.data(), lda, x.data(), incx, beta,
                                         y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gbmv, transa, m,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gbmv, transa, m,
                                         n, kl, ku, alpha, A.data(), lda, x.data(), incx, beta,
                                         y.data(), incy, dependencies);
                 break;
@@ -129,7 +129,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -145,28 +145,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 }
 
 class GbmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GbmvUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2, 3,
-                                  42));
+                                  oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2,
+                                  3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
+                                  oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
                                   -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, 1,
-                                  42));
+                                  oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1,
+                                  1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
+                                  oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
                                   42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3,
+                                  oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3,
                                   42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
+                                  oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
                                   42));
 }
 TEST_P(GbmvUsmTests, RealDoublePrecision) {
@@ -175,53 +175,53 @@ TEST_P(GbmvUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2,
+                                   oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2,
                                    3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
+                                   oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2,
                                    -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1,
+                                   oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1,
                                    1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
+                                   oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3,
-                                   42));
+                                   oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2,
+                                   -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
+                                   oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1,
                                    42));
 }
 TEST_P(GbmvUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                 alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                 alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                 alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
+                                                oneapi::math::transpose::trans, 25, 30, 5, 7, alpha,
                                                 beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
+                                                oneapi::math::transpose::trans, 25, 30, 5, 7, alpha,
                                                 beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
+                                                oneapi::math::transpose::trans, 25, 30, 5, 7, alpha,
                                                 beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                 alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                 alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                 alpha, beta, 1, 1, 42));
 }
 TEST_P(GbmvUsmTests, ComplexDoublePrecision) {
@@ -230,38 +230,38 @@ TEST_P(GbmvUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                  alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                  alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::nontrans, 25, 30, 5, 7,
                                                  alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
-                                                 beta, 2, 3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, 5, 7,
+                                                 alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
-                                                 beta, -2, -3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, 5, 7,
+                                                 alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, 5, 7, alpha,
-                                                 beta, 1, 1, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, 5, 7,
+                                                 alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                  alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                  alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, 5, 7,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, 5, 7,
                                                  alpha, beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(GbmvUsmTestSuite, GbmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/gemv.cpp b/tests/unit_tests/blas/level2/gemv.cpp
index bd15ab54b..7d0999b1d 100644
--- a/tests/unit_tests/blas/level2/gemv.cpp
+++ b/tests/unit_tests/blas/level2/gemv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n,
          fp alpha, fp beta, int incx, int incy, int lda) {
     // Prepare data.
     int x_len = outer_dimension(transa, m, n);
@@ -58,7 +58,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     rand_vector(x, x_len, incx);
     rand_vector(y, y_len, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
 
     // Call Reference GEMV.
     const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda;
@@ -93,25 +93,25 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gemv(main_queue, transa, m, n, alpha, A_buffer,
-                                                      lda, x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gemv(main_queue, transa, m, n, alpha, A_buffer,
+                                                       lda, x_buffer, incx, beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gemv(main_queue, transa, m, n, alpha, A_buffer, lda,
-                                                   x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gemv(main_queue, transa, m, n, alpha, A_buffer, lda,
+                                                    x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemv, transa,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemv, transa,
                                         m, n, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer,
                                         incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemv, transa, m,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemv, transa, m,
                                         n, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer,
                                         incy);
                 break;
@@ -124,7 +124,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -139,25 +139,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     return (int)good;
 }
 
-class GemvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class GemvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(GemvTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3,
+                                  42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
+                                  oneapi::math::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
                                   42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1,
+                                  42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, alpha, beta, -2, -3, 42));
+                                  oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
 }
 TEST_P(GemvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -165,51 +167,52 @@ TEST_P(GemvTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 2, 3,
+                                   oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
+                                   oneapi::math::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 1, 1,
+                                   oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
+                                   oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, alpha, beta, -2, -3, 42));
+                                   oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3,
+                                   42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
+                                   oneapi::math::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
 }
 
 TEST_P(GemvTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 2, 3, alpha, beta,
-                                                2, 3, 42));
+                                                oneapi::math::transpose::nontrans, 2, 3, alpha,
+                                                beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 2, 3, alpha, beta,
-                                                -2, -3, 42));
+                                                oneapi::math::transpose::nontrans, 2, 3, alpha,
+                                                beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 2, 3, alpha, beta,
-                                                1, 1, 42));
+                                                oneapi::math::transpose::nontrans, 2, 3, alpha,
+                                                beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 2, 3, alpha, beta, 2,
-                                                3, 42));
+                                                oneapi::math::transpose::trans, 2, 3, alpha, beta,
+                                                2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 2, 3, alpha, beta,
+                                                oneapi::math::transpose::trans, 2, 3, alpha, beta,
                                                 -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 2, 3, alpha, beta, 1,
-                                                1, 42));
+                                                oneapi::math::transpose::trans, 2, 3, alpha, beta,
+                                                1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 2, 3, alpha,
+                                                oneapi::math::transpose::conjtrans, 2, 3, alpha,
                                                 beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 2, 3, alpha,
+                                                oneapi::math::transpose::conjtrans, 2, 3, alpha,
                                                 beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 2, 3, alpha,
+                                                oneapi::math::transpose::conjtrans, 2, 3, alpha,
                                                 beta, 1, 1, 42));
 }
 
@@ -219,38 +222,38 @@ TEST_P(GemvTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                  beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                  beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                  beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
-                                                 2, 3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, alpha,
+                                                 beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
-                                                 -2, -3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, alpha,
+                                                 beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
-                                                 1, 1, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, alpha,
+                                                 beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                  beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                  beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                  beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemvTestSuite, GemvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/gemv_usm.cpp b/tests/unit_tests/blas/level2/gemv_usm.cpp
index a513ab149..c5f9d4dec 100644
--- a/tests/unit_tests/blas/level2/gemv_usm.cpp
+++ b/tests/unit_tests/blas/level2/gemv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n,
          fp alpha, fp beta, int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -76,7 +76,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 
     rand_vector(x, x_len, incx);
     rand_vector(y, y_len, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
 
     auto y_ref = y;
 
@@ -93,28 +93,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemv(main_queue, transa, m, n, alpha,
-                                                             A.data(), lda, x.data(), incx, beta,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemv(main_queue, transa, m, n, alpha,
+                                                              A.data(), lda, x.data(), incx, beta,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemv(main_queue, transa, m, n, alpha, A.data(),
-                                                          lda, x.data(), incx, beta, y.data(), incy,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemv(main_queue, transa, m, n, alpha,
+                                                           A.data(), lda, x.data(), incx, beta,
+                                                           y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemv, transa,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemv, transa,
                                         m, n, alpha, A.data(), lda, x.data(), incx, beta, y.data(),
                                         incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemv, transa, m,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemv, transa, m,
                                         n, alpha, A.data(), lda, x.data(), incx, beta, y.data(),
                                         incy, dependencies);
                 break;
@@ -128,7 +128,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -144,24 +144,26 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 }
 
 class GemvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemvUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3,
+                                  42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
+                                  oneapi::math::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
                                   42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1,
+                                  42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, alpha, beta, -2, -3, 42));
+                                  oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
 }
 TEST_P(GemvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -169,50 +171,51 @@ TEST_P(GemvUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 2, 3,
+                                   oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
+                                   oneapi::math::transpose::nontrans, 25, 30, alpha, beta, -2, -3,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::nontrans, 25, 30, alpha, beta, 1, 1,
+                                   oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1,
                                    42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
+                                   oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, alpha, beta, -2, -3, 42));
+                                   oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3,
+                                   42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
+                                   oneapi::math::transpose::trans, 25, 30, alpha, beta, 1, 1, 42));
 }
 TEST_P(GemvUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                 beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                 beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                 beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
+                                                oneapi::math::transpose::trans, 25, 30, alpha, beta,
                                                 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
+                                                oneapi::math::transpose::trans, 25, 30, alpha, beta,
                                                 -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
+                                                oneapi::math::transpose::trans, 25, 30, alpha, beta,
                                                 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                 beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                 beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                 beta, 1, 1, 42));
 }
 TEST_P(GemvUsmTests, ComplexDoublePrecision) {
@@ -221,38 +224,38 @@ TEST_P(GemvUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                  beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                  beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::nontrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::nontrans, 25, 30, alpha,
                                                  beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
-                                                 2, 3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, alpha,
+                                                 beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
-                                                 -2, -3, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, alpha,
+                                                 beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::trans, 25, 30, alpha, beta,
-                                                 1, 1, 42));
+                                                 oneapi::math::transpose::trans, 25, 30, alpha,
+                                                 beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                  beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                  beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::transpose::conjtrans, 25, 30, alpha,
+                                                 oneapi::math::transpose::conjtrans, 25, 30, alpha,
                                                  beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemvUsmTestSuite, GemvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/ger.cpp b/tests/unit_tests/blas/level2/ger.cpp
index 7610239ad..9c207a7c4 100644
--- a/tests/unit_tests/blas/level2/ger.cpp
+++ b/tests/unit_tests/blas/level2/ger.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy,
+int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy,
          int lda) {
     // Prepare data.
 
@@ -55,7 +55,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 
     rand_vector(x, m, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
     A_ref = A;
 
     // Call Reference GER.
@@ -90,24 +90,24 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::ger(main_queue, m, n, alpha, x_buffer, incx,
-                                                     y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::ger(main_queue, m, n, alpha, x_buffer, incx,
+                                                      y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::ger(main_queue, m, n, alpha, x_buffer, incx, y_buffer,
-                                                  incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::ger(main_queue, m, n, alpha, x_buffer, incx,
+                                                   y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::ger, m, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::ger, m, n,
                                         alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::ger, m, n, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::ger, m, n, alpha,
                                         x_buffer, incx, y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
@@ -119,7 +119,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -135,7 +135,8 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     return (int)good;
 }
 
-class GerTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+class GerTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
+};
 
 TEST_P(GerTests, RealSinglePrecision) {
     float alpha(2.0);
@@ -160,8 +161,8 @@ TEST_P(GerTests, RealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GerTestSuite, GerTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/ger_usm.cpp b/tests/unit_tests/blas/level2/ger_usm.cpp
index c9bece6b8..c0a523785 100644
--- a/tests/unit_tests/blas/level2/ger_usm.cpp
+++ b/tests/unit_tests/blas/level2/ger_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy,
+int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy,
          int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -74,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 
     rand_vector(x, m, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
 
     auto A_ref = A;
 
@@ -90,28 +90,28 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::ger(main_queue, m, n, alpha, x.data(), incx,
-                                                            y.data(), incy, A.data(), lda,
-                                                            dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::ger(main_queue, m, n, alpha, x.data(),
+                                                             incx, y.data(), incy, A.data(), lda,
+                                                             dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
+            case oneapi::math::layout::row_major:
                 done =
-                    oneapi::mkl::blas::row_major::ger(main_queue, m, n, alpha, x.data(), incx,
-                                                      y.data(), incy, A.data(), lda, dependencies);
+                    oneapi::math::blas::row_major::ger(main_queue, m, n, alpha, x.data(), incx,
+                                                       y.data(), incy, A.data(), lda, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::ger, m, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::ger, m, n,
                                         alpha, x.data(), incx, y.data(), incy, A.data(), lda,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::ger, m, n, alpha,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::ger, m, n, alpha,
                                         x.data(), incx, y.data(), incy, A.data(), lda,
                                         dependencies);
                 break;
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,7 +141,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 }
 
 class GerUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GerUsmTests, RealSinglePrecision) {
     float alpha(2.0);
@@ -166,8 +166,8 @@ TEST_P(GerUsmTests, RealDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GerUsmTestSuite, GerUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/gerc.cpp b/tests/unit_tests/blas/level2/gerc.cpp
index e918bbf92..8d89f8347 100644
--- a/tests/unit_tests/blas/level2/gerc.cpp
+++ b/tests/unit_tests/blas/level2/gerc.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy,
+int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy,
          int lda) {
     // Prepare data.
 
@@ -55,7 +55,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 
     rand_vector(x, m, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
     A_ref = A;
 
     // Call Reference GERC.
@@ -90,25 +90,25 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gerc(main_queue, m, n, alpha, x_buffer, incx,
-                                                      y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gerc(main_queue, m, n, alpha, x_buffer, incx,
+                                                       y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gerc(main_queue, m, n, alpha, x_buffer, incx,
-                                                   y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gerc(main_queue, m, n, alpha, x_buffer, incx,
+                                                    y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gerc, m, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gerc, m, n,
                                         alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gerc, m, n, alpha,
-                                        x_buffer, incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gerc, m, n,
+                                        alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
         }
@@ -119,7 +119,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -135,7 +135,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     return (int)good;
 }
 
-class GercTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class GercTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(GercTests, ComplexSinglePrecision) {
@@ -161,8 +161,8 @@ TEST_P(GercTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GercTestSuite, GercTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/gerc_usm.cpp b/tests/unit_tests/blas/level2/gerc_usm.cpp
index c9f04060d..a2d9d28cb 100644
--- a/tests/unit_tests/blas/level2/gerc_usm.cpp
+++ b/tests/unit_tests/blas/level2/gerc_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy,
+int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy,
          int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -74,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 
     rand_vector(x, m, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
 
     auto A_ref = A;
 
@@ -90,29 +90,29 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gerc(main_queue, m, n, alpha, x.data(),
-                                                             incx, y.data(), incy, A.data(), lda,
-                                                             dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gerc(main_queue, m, n, alpha, x.data(),
+                                                              incx, y.data(), incy, A.data(), lda,
+                                                              dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done =
-                    oneapi::mkl::blas::row_major::gerc(main_queue, m, n, alpha, x.data(), incx,
-                                                       y.data(), incy, A.data(), lda, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gerc(main_queue, m, n, alpha, x.data(), incx,
+                                                           y.data(), incy, A.data(), lda,
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gerc, m, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gerc, m, n,
                                         alpha, x.data(), incx, y.data(), incy, A.data(), lda,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gerc, m, n, alpha,
-                                        x.data(), incx, y.data(), incy, A.data(), lda,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gerc, m, n,
+                                        alpha, x.data(), incx, y.data(), incy, A.data(), lda,
                                         dependencies);
                 break;
             default: break;
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,7 +141,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 }
 
 class GercUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GercUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
@@ -166,8 +166,8 @@ TEST_P(GercUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GercUsmTestSuite, GercUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/geru.cpp b/tests/unit_tests/blas/level2/geru.cpp
index 23af195cf..7a44656b1 100644
--- a/tests/unit_tests/blas/level2/geru.cpp
+++ b/tests/unit_tests/blas/level2/geru.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy,
+int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy,
          int lda) {
     // Prepare data.
 
@@ -55,7 +55,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 
     rand_vector(x, m, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
     A_ref = A;
 
     // Call Reference GERU.
@@ -90,25 +90,25 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::geru(main_queue, m, n, alpha, x_buffer, incx,
-                                                      y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::geru(main_queue, m, n, alpha, x_buffer, incx,
+                                                       y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::geru(main_queue, m, n, alpha, x_buffer, incx,
-                                                   y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::geru(main_queue, m, n, alpha, x_buffer, incx,
+                                                    y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::geru, m, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::geru, m, n,
                                         alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::geru, m, n, alpha,
-                                        x_buffer, incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::geru, m, n,
+                                        alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
         }
@@ -119,7 +119,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -135,7 +135,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     return (int)good;
 }
 
-class GeruTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class GeruTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(GeruTests, ComplexSinglePrecision) {
@@ -161,8 +161,8 @@ TEST_P(GeruTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GeruTestSuite, GeruTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/geru_usm.cpp b/tests/unit_tests/blas/level2/geru_usm.cpp
index 31f4e2116..6bf155795 100644
--- a/tests/unit_tests/blas/level2/geru_usm.cpp
+++ b/tests/unit_tests/blas/level2/geru_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy,
+int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy,
          int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -74,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 
     rand_vector(x, m, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, n, lda);
 
     auto A_ref = A;
 
@@ -90,29 +90,29 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::geru(main_queue, m, n, alpha, x.data(),
-                                                             incx, y.data(), incy, A.data(), lda,
-                                                             dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::geru(main_queue, m, n, alpha, x.data(),
+                                                              incx, y.data(), incy, A.data(), lda,
+                                                              dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done =
-                    oneapi::mkl::blas::row_major::geru(main_queue, m, n, alpha, x.data(), incx,
-                                                       y.data(), incy, A.data(), lda, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::geru(main_queue, m, n, alpha, x.data(), incx,
+                                                           y.data(), incy, A.data(), lda,
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::geru, m, n,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::geru, m, n,
                                         alpha, x.data(), incx, y.data(), incy, A.data(), lda,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::geru, m, n, alpha,
-                                        x.data(), incx, y.data(), incy, A.data(), lda,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::geru, m, n,
+                                        alpha, x.data(), incx, y.data(), incy, A.data(), lda,
                                         dependencies);
                 break;
             default: break;
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,7 +141,7 @@ int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in
 }
 
 class GeruUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GeruUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
@@ -166,8 +166,8 @@ TEST_P(GeruUsmTests, ComplexDoublePrecision) {
 
 INSTANTIATE_TEST_SUITE_P(GeruUsmTestSuite, GeruUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hbmv.cpp b/tests/unit_tests/blas/level2/hbmv.cpp
index aa2b51ffa..bd1fc71f2 100644
--- a/tests/unit_tests/blas/level2/hbmv.cpp
+++ b/tests/unit_tests/blas/level2/hbmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k,
          fp alpha, fp beta, int incx, int incy, int lda) {
     // Prepare data.
     vector<fp> x, y, y_ref, A;
@@ -55,7 +55,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     // Call Reference HBMV.
     const int n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda;
@@ -91,28 +91,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::hbmv(main_queue, upper_lower, n, k, alpha,
-                                                      A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                                      incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::hbmv(main_queue, upper_lower, n, k, alpha,
+                                                       A_buffer, lda, x_buffer, incx, beta,
+                                                       y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::hbmv(main_queue, upper_lower, n, k, alpha, A_buffer,
-                                                   lda, x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::hbmv(main_queue, upper_lower, n, k, alpha, A_buffer,
+                                                    lda, x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hbmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hbmv,
                                         upper_lower, n, k, alpha, A_buffer, lda, x_buffer, incx,
                                         beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hbmv, upper_lower,
-                                        n, k, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                        incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hbmv,
+                                        upper_lower, n, k, alpha, A_buffer, lda, x_buffer, incx,
+                                        beta, y_buffer, incy);
                 break;
             default: break;
         }
@@ -123,7 +123,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -138,29 +138,29 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class HbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class HbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(HbmvTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2,
+                                                oneapi::math::uplo::lower, 30, 5, alpha, beta, -2,
                                                 -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2,
+                                                oneapi::math::uplo::upper, 30, 5, alpha, beta, -2,
                                                 -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1,
                                                 42));
 }
 TEST_P(HbmvTests, ComplexDoublePrecision) {
@@ -169,29 +169,29 @@ TEST_P(HbmvTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3,
-                                                 42));
+                                                 oneapi::math::uplo::lower, 30, 5, alpha, beta, 2,
+                                                 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3,
-                                                 42));
+                                                 oneapi::math::uplo::upper, 30, 5, alpha, beta, 2,
+                                                 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2,
+                                                 oneapi::math::uplo::lower, 30, 5, alpha, beta, -2,
                                                  -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2,
+                                                 oneapi::math::uplo::upper, 30, 5, alpha, beta, -2,
                                                  -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1,
-                                                 42));
+                                                 oneapi::math::uplo::lower, 30, 5, alpha, beta, 1,
+                                                 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1,
-                                                 42));
+                                                 oneapi::math::uplo::upper, 30, 5, alpha, beta, 1,
+                                                 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(HbmvTestSuite, HbmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hbmv_usm.cpp b/tests/unit_tests/blas/level2/hbmv_usm.cpp
index 183dc9e28..325013029 100644
--- a/tests/unit_tests/blas/level2/hbmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/hbmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k,
          fp alpha, fp beta, int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -74,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto y_ref = y;
 
@@ -92,30 +92,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::hbmv(main_queue, upper_lower, n, k, alpha,
-                                                             A.data(), lda, x.data(), incx, beta,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::hbmv(main_queue, upper_lower, n, k, alpha,
+                                                              A.data(), lda, x.data(), incx, beta,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::hbmv(main_queue, upper_lower, n, k, alpha,
-                                                          A.data(), lda, x.data(), incx, beta,
-                                                          y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::hbmv(main_queue, upper_lower, n, k, alpha,
+                                                           A.data(), lda, x.data(), incx, beta,
+                                                           y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hbmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hbmv,
                                         upper_lower, n, k, alpha, A.data(), lda, x.data(), incx,
                                         beta, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hbmv, upper_lower,
-                                        n, k, alpha, A.data(), lda, x.data(), incx, beta, y.data(),
-                                        incy, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hbmv,
+                                        upper_lower, n, k, alpha, A.data(), lda, x.data(), incx,
+                                        beta, y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -127,7 +127,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -143,28 +143,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class HbmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(HbmvUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2,
+                                                oneapi::math::uplo::lower, 30, 5, alpha, beta, -2,
                                                 -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2,
+                                                oneapi::math::uplo::upper, 30, 5, alpha, beta, -2,
                                                 -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1,
                                                 42));
 }
 TEST_P(HbmvUsmTests, ComplexDoublePrecision) {
@@ -173,29 +173,29 @@ TEST_P(HbmvUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3,
-                                                 42));
+                                                 oneapi::math::uplo::lower, 30, 5, alpha, beta, 2,
+                                                 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3,
-                                                 42));
+                                                 oneapi::math::uplo::upper, 30, 5, alpha, beta, 2,
+                                                 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2,
+                                                 oneapi::math::uplo::lower, 30, 5, alpha, beta, -2,
                                                  -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2,
+                                                 oneapi::math::uplo::upper, 30, 5, alpha, beta, -2,
                                                  -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1,
-                                                 42));
+                                                 oneapi::math::uplo::lower, 30, 5, alpha, beta, 1,
+                                                 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1,
-                                                 42));
+                                                 oneapi::math::uplo::upper, 30, 5, alpha, beta, 1,
+                                                 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(HbmvUsmTestSuite, HbmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hemv.cpp b/tests/unit_tests/blas/level2/hemv.cpp
index 5e68db394..26d71956c 100644
--- a/tests/unit_tests/blas/level2/hemv.cpp
+++ b/tests/unit_tests/blas/level2/hemv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy, int lda) {
     // Prepare data.
     vector<fp> x, y, y_ref, A;
@@ -55,7 +55,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     // Call Reference HEMV.
     const int n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda;
@@ -90,27 +90,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::hemv(main_queue, upper_lower, n, alpha, A_buffer,
-                                                      lda, x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::hemv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                       lda, x_buffer, incx, beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::hemv(main_queue, upper_lower, n, alpha, A_buffer, lda,
-                                                   x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::hemv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                    lda, x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hemv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hemv,
                                         upper_lower, n, alpha, A_buffer, lda, x_buffer, incx, beta,
                                         y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hemv, upper_lower,
-                                        n, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                        incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemv,
+                                        upper_lower, n, alpha, A_buffer, lda, x_buffer, incx, beta,
+                                        y_buffer, incy);
                 break;
             default: break;
         }
@@ -121,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,29 +136,29 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class HemvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class HemvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(HemvTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3,
+                                                oneapi::math::uplo::lower, 30, alpha, beta, -2, -3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3,
+                                                oneapi::math::uplo::upper, 30, alpha, beta, -2, -3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 1, 1,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 1, 1,
                                                 42));
 }
 TEST_P(HemvTests, ComplexDoublePrecision) {
@@ -167,29 +167,29 @@ TEST_P(HemvTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 2, 3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 2, 3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, -2, -3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, -2, -3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 1, 1,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 1, 1,
                                                  42));
 }
 
 INSTANTIATE_TEST_SUITE_P(HemvTestSuite, HemvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hemv_usm.cpp b/tests/unit_tests/blas/level2/hemv_usm.cpp
index a5c20b4b9..3ca3fd7cf 100644
--- a/tests/unit_tests/blas/level2/hemv_usm.cpp
+++ b/tests/unit_tests/blas/level2/hemv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -74,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto y_ref = y;
 
@@ -91,30 +91,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::hemv(main_queue, upper_lower, n, alpha,
-                                                             A.data(), lda, x.data(), incx, beta,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::hemv(main_queue, upper_lower, n, alpha,
+                                                              A.data(), lda, x.data(), incx, beta,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::hemv(main_queue, upper_lower, n, alpha,
-                                                          A.data(), lda, x.data(), incx, beta,
-                                                          y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::hemv(main_queue, upper_lower, n, alpha,
+                                                           A.data(), lda, x.data(), incx, beta,
+                                                           y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hemv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hemv,
                                         upper_lower, n, alpha, A.data(), lda, x.data(), incx, beta,
                                         y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hemv, upper_lower,
-                                        n, alpha, A.data(), lda, x.data(), incx, beta, y.data(),
-                                        incy, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemv,
+                                        upper_lower, n, alpha, A.data(), lda, x.data(), incx, beta,
+                                        y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -142,28 +142,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class HemvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(HemvUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3,
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 2, 3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3,
+                                                oneapi::math::uplo::lower, 30, alpha, beta, -2, -3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3,
+                                                oneapi::math::uplo::upper, 30, alpha, beta, -2, -3,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 1, 1,
                                                 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1,
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 1, 1,
                                                 42));
 }
 TEST_P(HemvUsmTests, ComplexDoublePrecision) {
@@ -172,29 +172,29 @@ TEST_P(HemvUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 2, 3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 2, 3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, -2, -3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, -2, -3,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 1, 1,
                                                  42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 1, 1,
                                                  42));
 }
 
 INSTANTIATE_TEST_SUITE_P(HemvUsmTestSuite, HemvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/her.cpp b/tests/unit_tests/blas/level2/her.cpp
index 8b0e77cf2..eaeb82048 100644
--- a/tests/unit_tests/blas/level2/her.cpp
+++ b/tests/unit_tests/blas/level2/her.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,21 +47,21 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n,
          fp_scalar alpha, int incx, int lda) {
     // Prepare data.
     vector<fp> x, A_ref, A;
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
     A_ref = A;
 
     // Call Reference HER.
     const int n_ref = n, incx_ref = incx, lda_ref = lda;
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     ::her(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref,
-          (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref);
+          (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref);
 
     // Call DPC++ HER.
 
@@ -87,24 +87,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::her(main_queue, upper_lower, n, alpha, x_buffer,
-                                                     incx, A_buffer, lda);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::her(main_queue, upper_lower, n, alpha, x_buffer,
+                                                      incx, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::her(main_queue, upper_lower, n, alpha, x_buffer, incx,
-                                                  A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::her(main_queue, upper_lower, n, alpha, x_buffer,
+                                                   incx, A_buffer, lda);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::her,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::her,
                                         upper_lower, n, alpha, x_buffer, incx, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::her, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her, upper_lower,
                                         n, alpha, x_buffer, incx, A_buffer, lda);
                 break;
             default: break;
@@ -116,7 +116,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -131,28 +131,29 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class HerTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+class HerTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
+};
 
 TEST_P(HerTests, ComplexSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, 2, 42)));
+                                          oneapi::math::uplo::lower, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, 2, 42)));
+                                          oneapi::math::uplo::upper, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, -2, 42)));
+                                          oneapi::math::uplo::lower, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, -2, 42)));
+                                          oneapi::math::uplo::upper, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, 1, 42)));
+                                          oneapi::math::uplo::lower, 30, alpha, 1, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, 1, 42)));
+                                          oneapi::math::uplo::upper, 30, alpha, 1, 42)));
 }
 TEST_P(HerTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -160,28 +161,28 @@ TEST_P(HerTests, ComplexDoublePrecision) {
     double alpha(2.0);
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, 2, 42)));
+                                            oneapi::math::uplo::lower, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, 2, 42)));
+                                            oneapi::math::uplo::upper, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, -2, 42)));
+                                            oneapi::math::uplo::lower, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, -2, 42)));
+                                            oneapi::math::uplo::upper, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, 1, 42)));
+                                            oneapi::math::uplo::lower, 30, alpha, 1, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, 1, 42)));
+                                            oneapi::math::uplo::upper, 30, alpha, 1, 42)));
 }
 
 INSTANTIATE_TEST_SUITE_P(HerTestSuite, HerTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/her2.cpp b/tests/unit_tests/blas/level2/her2.cpp
index 9da2be96a..6ab26b315 100644
--- a/tests/unit_tests/blas/level2/her2.cpp
+++ b/tests/unit_tests/blas/level2/her2.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,14 +47,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy, int lda) {
     // Prepare data.
     vector<fp> x, y, A_ref, A;
 
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
     A_ref = A;
 
     // Call Reference HER2.
@@ -90,26 +90,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::her2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                      incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::her2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                       incx, y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::her2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                   incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::her2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                    incx, y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::her2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::her2,
                                         upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
                                         A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::her2, upper_lower,
-                                        n, alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2,
+                                        upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
+                                        A_buffer, lda);
                 break;
             default: break;
         }
@@ -120,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -135,46 +136,46 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class Her2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class Her2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(Her2Tests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                                oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                                oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                                oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                                oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                                oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                                oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 TEST_P(Her2Tests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                                 oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                                 oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                                 oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                                 oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                                 oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                                 oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(Her2TestSuite, Her2Tests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/her2_usm.cpp b/tests/unit_tests/blas/level2/her2_usm.cpp
index 6d65f18f4..1fb29c132 100644
--- a/tests/unit_tests/blas/level2/her2_usm.cpp
+++ b/tests/unit_tests/blas/level2/her2_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -74,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto A_ref = A;
 
@@ -91,30 +91,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::her2(main_queue, upper_lower, n, alpha,
-                                                             x.data(), incx, y.data(), incy,
-                                                             A.data(), lda, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::her2(main_queue, upper_lower, n, alpha,
+                                                              x.data(), incx, y.data(), incy,
+                                                              A.data(), lda, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::her2(main_queue, upper_lower, n, alpha,
-                                                          x.data(), incx, y.data(), incy, A.data(),
-                                                          lda, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::her2(main_queue, upper_lower, n, alpha,
+                                                           x.data(), incx, y.data(), incy, A.data(),
+                                                           lda, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::her2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::her2,
                                         upper_lower, n, alpha, x.data(), incx, y.data(), incy,
                                         A.data(), lda, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::her2, upper_lower,
-                                        n, alpha, x.data(), incx, y.data(), incy, A.data(), lda,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2,
+                                        upper_lower, n, alpha, x.data(), incx, y.data(), incy,
+                                        A.data(), lda, dependencies);
                 break;
             default: break;
         }
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -142,45 +142,45 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class Her2UsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Her2UsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                                oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                                oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                                oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                                oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                                oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                                oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 TEST_P(Her2UsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                                 oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                                 oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                                 oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                                 oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                                 oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                                 oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(Her2UsmTestSuite, Her2UsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/her_usm.cpp b/tests/unit_tests/blas/level2/her_usm.cpp
index 083bd4f28..fa614dd25 100644
--- a/tests/unit_tests/blas/level2/her_usm.cpp
+++ b/tests/unit_tests/blas/level2/her_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n,
          fp_scalar alpha, int incx, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -72,29 +72,29 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> x(ua), A(ua);
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto A_ref = A;
 
     // Call Reference HER.
     const int n_ref = n, incx_ref = incx, lda_ref = lda;
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     ::her(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref,
-          (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref);
+          (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref);
 
     // Call DPC++ HER.
 
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::her(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::her(
                     main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), lda, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::her(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::her(
                     main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), lda, dependencies);
                 break;
             default: break;
@@ -102,13 +102,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::her,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::her,
                                         upper_lower, n, alpha, x.data(), incx, A.data(), lda,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::her, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her, upper_lower,
                                         n, alpha, x.data(), incx, A.data(), lda, dependencies);
                 break;
             default: break;
@@ -121,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -137,28 +137,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class HerUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(HerUsmTests, ComplexSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, 2, 42)));
+                                          oneapi::math::uplo::lower, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, 2, 42)));
+                                          oneapi::math::uplo::upper, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, -2, 42)));
+                                          oneapi::math::uplo::lower, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, -2, 42)));
+                                          oneapi::math::uplo::upper, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, 1, 42)));
+                                          oneapi::math::uplo::lower, 30, alpha, 1, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, 1, 42)));
+                                          oneapi::math::uplo::upper, 30, alpha, 1, 42)));
 }
 TEST_P(HerUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -166,28 +166,28 @@ TEST_P(HerUsmTests, ComplexDoublePrecision) {
     double alpha(2.0);
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, 2, 42)));
+                                            oneapi::math::uplo::lower, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, 2, 42)));
+                                            oneapi::math::uplo::upper, 30, alpha, 2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, -2, 42)));
+                                            oneapi::math::uplo::lower, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, -2, 42)));
+                                            oneapi::math::uplo::upper, 30, alpha, -2, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, 1, 42)));
+                                            oneapi::math::uplo::lower, 30, alpha, 1, 42)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, 1, 42)));
+                                            oneapi::math::uplo::upper, 30, alpha, 1, 42)));
 }
 
 INSTANTIATE_TEST_SUITE_P(HerUsmTestSuite, HerUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hpmv.cpp b/tests/unit_tests/blas/level2/hpmv.cpp
index 23f6c4d91..08b221536 100644
--- a/tests/unit_tests/blas/level2/hpmv.cpp
+++ b/tests/unit_tests/blas/level2/hpmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,14 +47,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy) {
     // Prepare data.
     vector<fp> x, y, y_ref, A;
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     // Call Reference HPMV.
     const int n_ref = n, incx_ref = incx, incy_ref = incy;
@@ -89,26 +89,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::hpmv(main_queue, upper_lower, n, alpha, A_buffer,
-                                                      x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::hpmv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                       x_buffer, incx, beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::hpmv(main_queue, upper_lower, n, alpha, A_buffer,
-                                                   x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::hpmv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                    x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hpmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hpmv,
                                         upper_lower, n, alpha, A_buffer, x_buffer, incx, beta,
                                         y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hpmv, upper_lower,
-                                        n, alpha, A_buffer, x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpmv,
+                                        upper_lower, n, alpha, A_buffer, x_buffer, incx, beta,
+                                        y_buffer, incy);
                 break;
             default: break;
         }
@@ -119,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,24 +135,26 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class HpmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class HpmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(HpmvTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3));
+                                                oneapi::math::uplo::lower, 30, alpha, beta, -2,
+                                                -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3));
+                                                oneapi::math::uplo::upper, 30, alpha, beta, -2,
+                                                -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 TEST_P(HpmvTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -159,25 +162,25 @@ TEST_P(HpmvTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, -2,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, -2,
                                                  -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, -2,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, -2,
                                                  -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(HpmvTestSuite, HpmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hpmv_usm.cpp b/tests/unit_tests/blas/level2/hpmv_usm.cpp
index 3766c7e7d..b81adfccb 100644
--- a/tests/unit_tests/blas/level2/hpmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/hpmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> x(ua), y(ua), A(ua);
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     auto y_ref = y;
 
@@ -90,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::hpmv(main_queue, upper_lower, n, alpha,
-                                                             A.data(), x.data(), incx, beta,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::hpmv(main_queue, upper_lower, n, alpha,
+                                                              A.data(), x.data(), incx, beta,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::hpmv(main_queue, upper_lower, n, alpha,
-                                                          A.data(), x.data(), incx, beta, y.data(),
-                                                          incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::hpmv(main_queue, upper_lower, n, alpha,
+                                                           A.data(), x.data(), incx, beta, y.data(),
+                                                           incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hpmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hpmv,
                                         upper_lower, n, alpha, A.data(), x.data(), incx, beta,
                                         y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hpmv, upper_lower,
-                                        n, alpha, A.data(), x.data(), incx, beta, y.data(), incy,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpmv,
+                                        upper_lower, n, alpha, A.data(), x.data(), incx, beta,
+                                        y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,23 +141,25 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class HpmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(HpmvUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3));
+                                                oneapi::math::uplo::lower, 30, alpha, beta, -2,
+                                                -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3));
+                                                oneapi::math::uplo::upper, 30, alpha, beta, -2,
+                                                -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                                oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                                oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 TEST_P(HpmvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -165,25 +167,25 @@ TEST_P(HpmvUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, -2,
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, -2,
                                                  -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, -2,
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, -2,
                                                  -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                                 oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                                 oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(HpmvUsmTestSuite, HpmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hpr.cpp b/tests/unit_tests/blas/level2/hpr.cpp
index ca79e335a..492c07457 100644
--- a/tests/unit_tests/blas/level2/hpr.cpp
+++ b/tests/unit_tests/blas/level2/hpr.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,21 +47,21 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n,
          fp_scalar alpha, int incx) {
     // Prepare data.
     vector<fp> x, A_ref, A;
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
     A_ref = A;
 
     // Call Reference HPR.
     const int n_ref = n, incx_ref = incx;
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     ::hpr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref,
-          (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data());
+          (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data());
 
     // Call DPC++ HPR.
 
@@ -87,24 +87,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::hpr(main_queue, upper_lower, n, alpha, x_buffer,
-                                                     incx, A_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::hpr(main_queue, upper_lower, n, alpha, x_buffer,
+                                                      incx, A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::hpr(main_queue, upper_lower, n, alpha, x_buffer, incx,
-                                                  A_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::hpr(main_queue, upper_lower, n, alpha, x_buffer,
+                                                   incx, A_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hpr,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hpr,
                                         upper_lower, n, alpha, x_buffer, incx, A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hpr, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr, upper_lower,
                                         n, alpha, x_buffer, incx, A_buffer);
                 break;
             default: break;
@@ -116,7 +116,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -131,50 +131,59 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class HprTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+class HprTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
+};
 
 TEST_P(HprTests, ComplexSinglePrecision) {
     float alpha(2.0);
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 2)));
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 2)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, -2)));
+                                          oneapi::math::uplo::lower, 30, alpha, 2)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, -2)));
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 1)));
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 1)));
+                                          oneapi::math::uplo::upper, 30, alpha, 2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::lower, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::upper, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::lower, 30, alpha, 1)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::upper, 30, alpha, 1)));
 }
 
 TEST_P(HprTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 2)));
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 2)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, -2)));
+                                            oneapi::math::uplo::lower, 30, alpha, 2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::upper, 30, alpha, 2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::lower, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::upper, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::lower, 30, alpha, 1)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, -2)));
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 1)));
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 1)));
+                                            oneapi::math::uplo::upper, 30, alpha, 1)));
 }
 
 INSTANTIATE_TEST_SUITE_P(HprTestSuite, HprTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hpr2.cpp b/tests/unit_tests/blas/level2/hpr2.cpp
index 22701fe3d..d8780b576 100644
--- a/tests/unit_tests/blas/level2/hpr2.cpp
+++ b/tests/unit_tests/blas/level2/hpr2.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,13 +47,13 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy) {
     // Prepare data.
     vector<fp> x, y, A_ref, A;
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
     A_ref = A;
 
     // Call Reference HPR2.
@@ -89,26 +89,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::hpr2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                      incx, y_buffer, incy, A_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::hpr2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                       incx, y_buffer, incy, A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::hpr2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                   incx, y_buffer, incy, A_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::hpr2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                    incx, y_buffer, incy, A_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hpr2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hpr2,
                                         upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
                                         A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hpr2, upper_lower,
-                                        n, alpha, x_buffer, incx, y_buffer, incy, A_buffer);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr2,
+                                        upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
+                                        A_buffer);
                 break;
             default: break;
         }
@@ -119,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,46 +135,46 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class Hpr2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class Hpr2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(Hpr2Tests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                                oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                                oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                                oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                                oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                                oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                                oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 TEST_P(Hpr2Tests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                                 oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                                 oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                                 oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                                 oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                                 oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                                 oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(Hpr2TestSuite, Hpr2Tests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hpr2_usm.cpp b/tests/unit_tests/blas/level2/hpr2_usm.cpp
index 392f9a74b..6a9d4c55e 100644
--- a/tests/unit_tests/blas/level2/hpr2_usm.cpp
+++ b/tests/unit_tests/blas/level2/hpr2_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> x(ua), y(ua), A(ua);
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     auto A_ref = A;
 
@@ -90,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::hpr2(main_queue, upper_lower, n, alpha,
-                                                             x.data(), incx, y.data(), incy,
-                                                             A.data(), dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::hpr2(main_queue, upper_lower, n, alpha,
+                                                              x.data(), incx, y.data(), incy,
+                                                              A.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::hpr2(main_queue, upper_lower, n, alpha,
-                                                          x.data(), incx, y.data(), incy, A.data(),
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::hpr2(main_queue, upper_lower, n, alpha,
+                                                           x.data(), incx, y.data(), incy, A.data(),
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hpr2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hpr2,
                                         upper_lower, n, alpha, x.data(), incx, y.data(), incy,
                                         A.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hpr2, upper_lower,
-                                        n, alpha, x.data(), incx, y.data(), incy, A.data(),
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr2,
+                                        upper_lower, n, alpha, x.data(), incx, y.data(), incy,
+                                        A.data(), dependencies);
                 break;
             default: break;
         }
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,45 +141,45 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class Hpr2UsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Hpr2UsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                                oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                                oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                                oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                                oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                                oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                                oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 TEST_P(Hpr2UsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                                 oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                                 oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                                 oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                                 oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                                 oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                                 oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(Hpr2UsmTestSuite, Hpr2UsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/hpr_usm.cpp b/tests/unit_tests/blas/level2/hpr_usm.cpp
index 708018e6d..a0e60c9d2 100644
--- a/tests/unit_tests/blas/level2/hpr_usm.cpp
+++ b/tests/unit_tests/blas/level2/hpr_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n,
          fp_scalar alpha, int incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -72,43 +72,43 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> x(ua), A(ua);
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     auto A_ref = A;
 
     // Call Reference HPR.
     const int n_ref = n, incx_ref = incx;
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     ::hpr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref,
-          (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data());
+          (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data());
 
     // Call DPC++ HPR.
 
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::hpr(main_queue, upper_lower, n, alpha,
-                                                            x.data(), incx, A.data(), dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::hpr(
+                    main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::hpr(main_queue, upper_lower, n, alpha,
-                                                         x.data(), incx, A.data(), dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::hpr(main_queue, upper_lower, n, alpha,
+                                                          x.data(), incx, A.data(), dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hpr,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hpr,
                                         upper_lower, n, alpha, x.data(), incx, A.data(),
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hpr, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr, upper_lower,
                                         n, alpha, x.data(), incx, A.data(), dependencies);
                 break;
             default: break;
@@ -121,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -137,50 +137,58 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class HprUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(HprUsmTests, ComplexSinglePrecision) {
     float alpha(2.0);
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 2)));
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 2)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::lower, 30, alpha, -2)));
+                                          oneapi::math::uplo::lower, 30, alpha, 2)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                          oneapi::mkl::uplo::upper, 30, alpha, -2)));
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 1)));
-    EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 1)));
+                                          oneapi::math::uplo::upper, 30, alpha, 2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::lower, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::upper, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::lower, 30, alpha, 1)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<float>, float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                          oneapi::math::uplo::upper, 30, alpha, 1)));
 }
 
 TEST_P(HprUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 2)));
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 2)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::lower, 30, alpha, -2)));
+                                            oneapi::math::uplo::lower, 30, alpha, 2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::upper, 30, alpha, 2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::lower, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::upper, 30, alpha, -2)));
+    EXPECT_TRUEORSKIP(
+        (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                            oneapi::math::uplo::lower, 30, alpha, 1)));
     EXPECT_TRUEORSKIP(
         (test<std::complex<double>, double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                            oneapi::mkl::uplo::upper, 30, alpha, -2)));
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower, 30, alpha, 1)));
-    EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper, 30, alpha, 1)));
+                                            oneapi::math::uplo::upper, 30, alpha, 1)));
 }
 
 INSTANTIATE_TEST_SUITE_P(HprUsmTestSuite, HprUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/sbmv.cpp b/tests/unit_tests/blas/level2/sbmv.cpp
index 49df93ad1..64f3a71bd 100644
--- a/tests/unit_tests/blas/level2/sbmv.cpp
+++ b/tests/unit_tests/blas/level2/sbmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,14 +47,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k,
          fp alpha, fp beta, int incx, int incy, int lda) {
     // Prepare data.
     vector<fp> x, y, y_ref, A;
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     // Call Reference SBMV.
     const int n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda;
@@ -89,28 +89,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::sbmv(main_queue, upper_lower, n, k, alpha,
-                                                      A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                                      incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::sbmv(main_queue, upper_lower, n, k, alpha,
+                                                       A_buffer, lda, x_buffer, incx, beta,
+                                                       y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::sbmv(main_queue, upper_lower, n, k, alpha, A_buffer,
-                                                   lda, x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::sbmv(main_queue, upper_lower, n, k, alpha, A_buffer,
+                                                    lda, x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::sbmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::sbmv,
                                         upper_lower, n, k, alpha, A_buffer, lda, x_buffer, incx,
                                         beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::sbmv, upper_lower,
-                                        n, k, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                        incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sbmv,
+                                        upper_lower, n, k, alpha, A_buffer, lda, x_buffer, incx,
+                                        beta, y_buffer, incy);
                 break;
             default: break;
         }
@@ -121,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,24 +136,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class SbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class SbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(SbmvTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
 }
 TEST_P(SbmvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -161,23 +161,23 @@ TEST_P(SbmvTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(SbmvTestSuite, SbmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/sbmv_usm.cpp b/tests/unit_tests/blas/level2/sbmv_usm.cpp
index 43093cb24..fd31e5285 100644
--- a/tests/unit_tests/blas/level2/sbmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/sbmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k,
          fp alpha, fp beta, int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> x(ua), y(ua), A(ua);
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto y_ref = y;
 
@@ -91,30 +91,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::sbmv(main_queue, upper_lower, n, k, alpha,
-                                                             A.data(), lda, x.data(), incx, beta,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::sbmv(main_queue, upper_lower, n, k, alpha,
+                                                              A.data(), lda, x.data(), incx, beta,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::sbmv(main_queue, upper_lower, n, k, alpha,
-                                                          A.data(), lda, x.data(), incx, beta,
-                                                          y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::sbmv(main_queue, upper_lower, n, k, alpha,
+                                                           A.data(), lda, x.data(), incx, beta,
+                                                           y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::sbmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::sbmv,
                                         upper_lower, n, k, alpha, A.data(), lda, x.data(), incx,
                                         beta, y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::sbmv, upper_lower,
-                                        n, k, alpha, A.data(), lda, x.data(), incx, beta, y.data(),
-                                        incy, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sbmv,
+                                        upper_lower, n, k, alpha, A.data(), lda, x.data(), incx,
+                                        beta, y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -142,23 +142,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class SbmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SbmvUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
 }
 TEST_P(SbmvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -166,23 +166,23 @@ TEST_P(SbmvUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::lower, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::upper, 30, 5, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(SbmvUsmTestSuite, SbmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/spmv.cpp b/tests/unit_tests/blas/level2/spmv.cpp
index a2121fbac..9935a61da 100644
--- a/tests/unit_tests/blas/level2/spmv.cpp
+++ b/tests/unit_tests/blas/level2/spmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,14 +47,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy) {
     // Prepare data.
     vector<fp> x, y, y_ref, A;
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     // Call Reference SPMV.
     const int n_ref = n, incx_ref = incx, incy_ref = incy;
@@ -89,26 +89,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::spmv(main_queue, upper_lower, n, alpha, A_buffer,
-                                                      x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::spmv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                       x_buffer, incx, beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::spmv(main_queue, upper_lower, n, alpha, A_buffer,
-                                                   x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::spmv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                    x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::spmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::spmv,
                                         upper_lower, n, alpha, A_buffer, x_buffer, incx, beta,
                                         y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::spmv, upper_lower,
-                                        n, alpha, A_buffer, x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spmv,
+                                        upper_lower, n, alpha, A_buffer, x_buffer, incx, beta,
+                                        y_buffer, incy);
                 break;
             default: break;
         }
@@ -119,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,24 +135,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class SpmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class SpmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(SpmvTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 TEST_P(SpmvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -159,23 +160,23 @@ TEST_P(SpmvTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(SpmvTestSuite, SpmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/spmv_usm.cpp b/tests/unit_tests/blas/level2/spmv_usm.cpp
index 9dfe57383..703c25232 100644
--- a/tests/unit_tests/blas/level2/spmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/spmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> x(ua), y(ua), A(ua);
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     auto y_ref = y;
 
@@ -90,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::spmv(main_queue, upper_lower, n, alpha,
-                                                             A.data(), x.data(), incx, beta,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::spmv(main_queue, upper_lower, n, alpha,
+                                                              A.data(), x.data(), incx, beta,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::spmv(main_queue, upper_lower, n, alpha,
-                                                          A.data(), x.data(), incx, beta, y.data(),
-                                                          incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::spmv(main_queue, upper_lower, n, alpha,
+                                                           A.data(), x.data(), incx, beta, y.data(),
+                                                           incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::spmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::spmv,
                                         upper_lower, n, alpha, A.data(), x.data(), incx, beta,
                                         y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::spmv, upper_lower,
-                                        n, alpha, A.data(), x.data(), incx, beta, y.data(), incy,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spmv,
+                                        upper_lower, n, alpha, A.data(), x.data(), incx, beta,
+                                        y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,23 +141,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class SpmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SpmvUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 TEST_P(SpmvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -165,23 +165,23 @@ TEST_P(SpmvUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 1, 1));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(SpmvUsmTestSuite, SpmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/spr.cpp b/tests/unit_tests/blas/level2/spr.cpp
index 05b809f45..456335f24 100644
--- a/tests/unit_tests/blas/level2/spr.cpp
+++ b/tests/unit_tests/blas/level2/spr.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,12 +47,12 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx) {
     // Prepare data.
     vector<fp> x, A_ref, A;
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
     A_ref = A;
 
     // Call Reference SPR.
@@ -86,24 +86,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::spr(main_queue, upper_lower, n, alpha, x_buffer,
-                                                     incx, A_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::spr(main_queue, upper_lower, n, alpha, x_buffer,
+                                                      incx, A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::spr(main_queue, upper_lower, n, alpha, x_buffer, incx,
-                                                  A_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::spr(main_queue, upper_lower, n, alpha, x_buffer,
+                                                   incx, A_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::spr,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::spr,
                                         upper_lower, n, alpha, x_buffer, incx, A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::spr, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr, upper_lower,
                                         n, alpha, x_buffer, incx, A_buffer);
                 break;
             default: break;
@@ -115,7 +115,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -130,45 +130,46 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class SprTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+class SprTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
+};
 
 TEST_P(SprTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2));
+                                  oneapi::math::uplo::lower, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2));
+                                  oneapi::math::uplo::upper, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2));
+                                  oneapi::math::uplo::lower, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2));
+                                  oneapi::math::uplo::upper, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1));
+                                  oneapi::math::uplo::lower, 30, alpha, 1));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1));
+                                  oneapi::math::uplo::upper, 30, alpha, 1));
 }
 TEST_P(SprTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2));
+                                   oneapi::math::uplo::lower, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2));
+                                   oneapi::math::uplo::upper, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2));
+                                   oneapi::math::uplo::lower, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2));
+                                   oneapi::math::uplo::upper, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1));
+                                   oneapi::math::uplo::lower, 30, alpha, 1));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1));
+                                   oneapi::math::uplo::upper, 30, alpha, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(SprTestSuite, SprTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/spr2.cpp b/tests/unit_tests/blas/level2/spr2.cpp
index bbb232f5c..78cfce411 100644
--- a/tests/unit_tests/blas/level2/spr2.cpp
+++ b/tests/unit_tests/blas/level2/spr2.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,13 +47,13 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy) {
     // Prepare data.
     vector<fp> x, y, A_ref, A;
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
     A_ref = A;
 
     // Call Reference SPR2.
@@ -89,26 +89,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::spr2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                      incx, y_buffer, incy, A_buffer);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::spr2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                       incx, y_buffer, incy, A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::spr2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                   incx, y_buffer, incy, A_buffer);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::spr2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                    incx, y_buffer, incy, A_buffer);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::spr2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::spr2,
                                         upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
                                         A_buffer);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::spr2, upper_lower,
-                                        n, alpha, x_buffer, incx, y_buffer, incy, A_buffer);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr2,
+                                        upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
+                                        A_buffer);
                 break;
             default: break;
         }
@@ -119,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,46 +135,46 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class Spr2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class Spr2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(Spr2Tests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                  oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                  oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                  oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                  oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                  oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                  oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 TEST_P(Spr2Tests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                   oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                   oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                   oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                   oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                   oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                   oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(Spr2TestSuite, Spr2Tests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/spr2_usm.cpp b/tests/unit_tests/blas/level2/spr2_usm.cpp
index 4a029015f..ec283b22e 100644
--- a/tests/unit_tests/blas/level2/spr2_usm.cpp
+++ b/tests/unit_tests/blas/level2/spr2_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> x(ua), y(ua), A(ua);
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     auto A_ref = A;
 
@@ -90,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::spr2(main_queue, upper_lower, n, alpha,
-                                                             x.data(), incx, y.data(), incy,
-                                                             A.data(), dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::spr2(main_queue, upper_lower, n, alpha,
+                                                              x.data(), incx, y.data(), incy,
+                                                              A.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::spr2(main_queue, upper_lower, n, alpha,
-                                                          x.data(), incx, y.data(), incy, A.data(),
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::spr2(main_queue, upper_lower, n, alpha,
+                                                           x.data(), incx, y.data(), incy, A.data(),
+                                                           dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::spr2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::spr2,
                                         upper_lower, n, alpha, x.data(), incx, y.data(), incy,
                                         A.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::spr2, upper_lower,
-                                        n, alpha, x.data(), incx, y.data(), incy, A.data(),
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr2,
+                                        upper_lower, n, alpha, x.data(), incx, y.data(), incy,
+                                        A.data(), dependencies);
                 break;
             default: break;
         }
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,45 +141,45 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class Spr2UsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Spr2UsmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                  oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                  oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                  oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                  oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                  oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                  oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 TEST_P(Spr2UsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2, 3));
+                                   oneapi::math::uplo::lower, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2, 3));
+                                   oneapi::math::uplo::upper, 30, alpha, 2, 3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2, -3));
+                                   oneapi::math::uplo::lower, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2, -3));
+                                   oneapi::math::uplo::upper, 30, alpha, -2, -3));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1, 1));
+                                   oneapi::math::uplo::lower, 30, alpha, 1, 1));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1, 1));
+                                   oneapi::math::uplo::upper, 30, alpha, 1, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(Spr2UsmTestSuite, Spr2UsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/spr_usm.cpp b/tests/unit_tests/blas/level2/spr_usm.cpp
index e81aa41d9..e70cbfd74 100644
--- a/tests/unit_tests/blas/level2/spr_usm.cpp
+++ b/tests/unit_tests/blas/level2/spr_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -72,7 +72,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> x(ua), A(ua);
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, n);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, n);
 
     auto A_ref = A;
 
@@ -88,26 +88,26 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::spr(main_queue, upper_lower, n, alpha,
-                                                            x.data(), incx, A.data(), dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::spr(
+                    main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::spr(main_queue, upper_lower, n, alpha,
-                                                         x.data(), incx, A.data(), dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::spr(main_queue, upper_lower, n, alpha,
+                                                          x.data(), incx, A.data(), dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::spr,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::spr,
                                         upper_lower, n, alpha, x.data(), incx, A.data(),
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::spr, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr, upper_lower,
                                         n, alpha, x.data(), incx, A.data(), dependencies);
                 break;
             default: break;
@@ -120,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,45 +136,45 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class SprUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SprUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2));
+                                  oneapi::math::uplo::lower, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2));
+                                  oneapi::math::uplo::upper, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2));
+                                  oneapi::math::uplo::lower, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2));
+                                  oneapi::math::uplo::upper, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1));
+                                  oneapi::math::uplo::lower, 30, alpha, 1));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1));
+                                  oneapi::math::uplo::upper, 30, alpha, 1));
 }
 TEST_P(SprUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2));
+                                   oneapi::math::uplo::lower, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2));
+                                   oneapi::math::uplo::upper, 30, alpha, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2));
+                                   oneapi::math::uplo::lower, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2));
+                                   oneapi::math::uplo::upper, 30, alpha, -2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1));
+                                   oneapi::math::uplo::lower, 30, alpha, 1));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1));
+                                   oneapi::math::uplo::upper, 30, alpha, 1));
 }
 
 INSTANTIATE_TEST_SUITE_P(SprUsmTestSuite, SprUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/symv.cpp b/tests/unit_tests/blas/level2/symv.cpp
index fb33d8914..6e136c771 100644
--- a/tests/unit_tests/blas/level2/symv.cpp
+++ b/tests/unit_tests/blas/level2/symv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,14 +47,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy, int lda) {
     // Prepare data.
     vector<fp> x, y, y_ref, A;
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
     y_ref = y;
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     // Call Reference SYMV.
     const int n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda;
@@ -89,27 +89,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::symv(main_queue, upper_lower, n, alpha, A_buffer,
-                                                      lda, x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::symv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                       lda, x_buffer, incx, beta, y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::symv(main_queue, upper_lower, n, alpha, A_buffer, lda,
-                                                   x_buffer, incx, beta, y_buffer, incy);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::symv(main_queue, upper_lower, n, alpha, A_buffer,
+                                                    lda, x_buffer, incx, beta, y_buffer, incy);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::symv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::symv,
                                         upper_lower, n, alpha, A_buffer, lda, x_buffer, incx, beta,
                                         y_buffer, incy);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::symv, upper_lower,
-                                        n, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer,
-                                        incy);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symv,
+                                        upper_lower, n, alpha, A_buffer, lda, x_buffer, incx, beta,
+                                        y_buffer, incy);
                 break;
             default: break;
         }
@@ -120,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -135,24 +135,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class SymvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class SymvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(SymvTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 1, 1, 42));
 }
 TEST_P(SymvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -160,23 +160,23 @@ TEST_P(SymvTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(SymvTestSuite, SymvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/symv_usm.cpp b/tests/unit_tests/blas/level2/symv_usm.cpp
index 8cfff4f39..def858041 100644
--- a/tests/unit_tests/blas/level2/symv_usm.cpp
+++ b/tests/unit_tests/blas/level2/symv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          fp beta, int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> x(ua), y(ua), A(ua);
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto y_ref = y;
 
@@ -90,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::symv(main_queue, upper_lower, n, alpha,
-                                                             A.data(), lda, x.data(), incx, beta,
-                                                             y.data(), incy, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::symv(main_queue, upper_lower, n, alpha,
+                                                              A.data(), lda, x.data(), incx, beta,
+                                                              y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::symv(main_queue, upper_lower, n, alpha,
-                                                          A.data(), lda, x.data(), incx, beta,
-                                                          y.data(), incy, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::symv(main_queue, upper_lower, n, alpha,
+                                                           A.data(), lda, x.data(), incx, beta,
+                                                           y.data(), incy, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::symv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::symv,
                                         upper_lower, n, alpha, A.data(), lda, x.data(), incx, beta,
                                         y.data(), incy, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::symv, upper_lower,
-                                        n, alpha, A.data(), lda, x.data(), incx, beta, y.data(),
-                                        incy, dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symv,
+                                        upper_lower, n, alpha, A.data(), lda, x.data(), incx, beta,
+                                        y.data(), incy, dependencies);
                 break;
             default: break;
         }
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,23 +141,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class SymvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SymvUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, beta, 1, 1, 42));
 }
 TEST_P(SymvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -165,23 +165,23 @@ TEST_P(SymvUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 2, 3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, -2, -3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, beta, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, beta, 1, 1, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, beta, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(SymvUsmTestSuite, SymvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/syr.cpp b/tests/unit_tests/blas/level2/syr.cpp
index f382749da..11679eabc 100644
--- a/tests/unit_tests/blas/level2/syr.cpp
+++ b/tests/unit_tests/blas/level2/syr.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,12 +47,12 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int lda) {
     // Prepare data.
     vector<fp> x, A_ref, A;
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
     A_ref = A;
 
     // Call Reference SYR.
@@ -86,24 +86,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::syr(main_queue, upper_lower, n, alpha, x_buffer,
-                                                     incx, A_buffer, lda);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::syr(main_queue, upper_lower, n, alpha, x_buffer,
+                                                      incx, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::syr(main_queue, upper_lower, n, alpha, x_buffer, incx,
-                                                  A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::syr(main_queue, upper_lower, n, alpha, x_buffer,
+                                                   incx, A_buffer, lda);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syr,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syr,
                                         upper_lower, n, alpha, x_buffer, incx, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syr, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr, upper_lower,
                                         n, alpha, x_buffer, incx, A_buffer, lda);
                 break;
             default: break;
@@ -115,7 +115,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -130,45 +130,46 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class SyrTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+class SyrTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
+};
 
 TEST_P(SyrTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 1, 42));
 }
 TEST_P(SyrTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(SyrTestSuite, SyrTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/syr2.cpp b/tests/unit_tests/blas/level2/syr2.cpp
index ef96572e5..622d5eeac 100644
--- a/tests/unit_tests/blas/level2/syr2.cpp
+++ b/tests/unit_tests/blas/level2/syr2.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,13 +47,13 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy, int lda) {
     // Prepare data.
     vector<fp> x, y, A_ref, A;
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
     A_ref = A;
 
     // Call Reference SYR2.
@@ -89,26 +89,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::syr2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                      incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::syr2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                       incx, y_buffer, incy, A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::syr2(main_queue, upper_lower, n, alpha, x_buffer,
-                                                   incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::syr2(main_queue, upper_lower, n, alpha, x_buffer,
+                                                    incx, y_buffer, incy, A_buffer, lda);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syr2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syr2,
                                         upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
                                         A_buffer, lda);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syr2, upper_lower,
-                                        n, alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2,
+                                        upper_lower, n, alpha, x_buffer, incx, y_buffer, incy,
+                                        A_buffer, lda);
                 break;
             default: break;
         }
@@ -119,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,46 +135,46 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class Syr2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class Syr2Tests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(Syr2Tests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 TEST_P(Syr2Tests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(Syr2TestSuite, Syr2Tests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/syr2_usm.cpp b/tests/unit_tests/blas/level2/syr2_usm.cpp
index 64db524f6..c56fc8647 100644
--- a/tests/unit_tests/blas/level2/syr2_usm.cpp
+++ b/tests/unit_tests/blas/level2/syr2_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int incy, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> x(ua), y(ua), A(ua);
     rand_vector(x, n, incx);
     rand_vector(y, n, incy);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto A_ref = A;
 
@@ -90,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::syr2(main_queue, upper_lower, n, alpha,
-                                                             x.data(), incx, y.data(), incy,
-                                                             A.data(), lda, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::syr2(main_queue, upper_lower, n, alpha,
+                                                              x.data(), incx, y.data(), incy,
+                                                              A.data(), lda, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::syr2(main_queue, upper_lower, n, alpha,
-                                                          x.data(), incx, y.data(), incy, A.data(),
-                                                          lda, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::syr2(main_queue, upper_lower, n, alpha,
+                                                           x.data(), incx, y.data(), incy, A.data(),
+                                                           lda, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syr2,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syr2,
                                         upper_lower, n, alpha, x.data(), incx, y.data(), incy,
                                         A.data(), lda, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syr2, upper_lower,
-                                        n, alpha, x.data(), incx, y.data(), incy, A.data(), lda,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2,
+                                        upper_lower, n, alpha, x.data(), incx, y.data(), incy,
+                                        A.data(), lda, dependencies);
                 break;
             default: break;
         }
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,45 +141,45 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class Syr2UsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Syr2UsmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 TEST_P(Syr2UsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2, 3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2, 3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 2, 3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2, -3, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2, -3, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, -2, -3, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1, 1, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 1, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1, 1, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 1, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(Syr2UsmTestSuite, Syr2UsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/syr_usm.cpp b/tests/unit_tests/blas/level2/syr_usm.cpp
index c6b652d24..96992b8fa 100644
--- a/tests/unit_tests/blas/level2/syr_usm.cpp
+++ b/tests/unit_tests/blas/level2/syr_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,7 +47,7 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha,
          int incx, int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -72,7 +72,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> x(ua), A(ua);
     rand_vector(x, n, incx);
-    rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
+    rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
 
     auto A_ref = A;
 
@@ -88,12 +88,12 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::syr(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::syr(
                     main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), lda, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::syr(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::syr(
                     main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), lda, dependencies);
                 break;
             default: break;
@@ -101,13 +101,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syr,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syr,
                                         upper_lower, n, alpha, x.data(), incx, A.data(), lda,
                                         dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syr, upper_lower,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr, upper_lower,
                                         n, alpha, x.data(), incx, A.data(), lda, dependencies);
                 break;
             default: break;
@@ -120,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,45 +136,45 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class SyrUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SyrUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 2, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 2, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, -2, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, -2, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, 30, alpha, 1, 42));
+                                  oneapi::math::uplo::lower, 30, alpha, 1, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, 30, alpha, 1, 42));
+                                  oneapi::math::uplo::upper, 30, alpha, 1, 42));
 }
 TEST_P(SyrUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 2, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 2, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, -2, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, -2, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, -2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, 30, alpha, 1, 42));
+                                   oneapi::math::uplo::lower, 30, alpha, 1, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, 30, alpha, 1, 42));
+                                   oneapi::math::uplo::upper, 30, alpha, 1, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(SyrUsmTestSuite, SyrUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tbmv.cpp b/tests/unit_tests/blas/level2/tbmv.cpp
index 554082a01..f2e601b42 100644
--- a/tests/unit_tests/blas/level2/tbmv.cpp
+++ b/tests/unit_tests/blas/level2/tbmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int k, int incx,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int k, int incx,
          int lda) {
     // Prepare data.
     vector<fp> x, x_ref, A;
@@ -89,26 +89,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::tbmv(main_queue, upper_lower, transa, unit_nonunit,
-                                                      n, k, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::tbmv(main_queue, upper_lower, transa,
+                                                       unit_nonunit, n, k, A_buffer, lda, x_buffer,
+                                                       incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::tbmv(main_queue, upper_lower, transa, unit_nonunit, n,
-                                                   k, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::tbmv(main_queue, upper_lower, transa, unit_nonunit,
+                                                    n, k, A_buffer, lda, x_buffer, incx);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tbmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tbmv,
                                         upper_lower, transa, unit_nonunit, n, k, A_buffer, lda,
                                         x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tbmv, upper_lower,
-                                        transa, unit_nonunit, n, k, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbmv,
+                                        upper_lower, transa, unit_nonunit, n, k, A_buffer, lda,
+                                        x_buffer, incx);
                 break;
             default: break;
         }
@@ -119,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,146 +136,146 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class TbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TbmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TbmvTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbmvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbmvTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbmvTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TbmvTestSuite, TbmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tbmv_usm.cpp b/tests/unit_tests/blas/level2/tbmv_usm.cpp
index 808c5d1c3..f64cef35b 100644
--- a/tests/unit_tests/blas/level2/tbmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/tbmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int k, int incx,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int k, int incx,
          int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -91,30 +91,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::tbmv(main_queue, upper_lower, transa,
-                                                             unit_nonunit, n, k, A.data(), lda,
-                                                             x.data(), incx, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::tbmv(main_queue, upper_lower, transa,
+                                                              unit_nonunit, n, k, A.data(), lda,
+                                                              x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::tbmv(main_queue, upper_lower, transa,
-                                                          unit_nonunit, n, k, A.data(), lda,
-                                                          x.data(), incx, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::tbmv(main_queue, upper_lower, transa,
+                                                           unit_nonunit, n, k, A.data(), lda,
+                                                           x.data(), incx, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tbmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tbmv,
                                         upper_lower, transa, unit_nonunit, n, k, A.data(), lda,
                                         x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tbmv, upper_lower,
-                                        transa, unit_nonunit, n, k, A.data(), lda, x.data(), incx,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbmv,
+                                        upper_lower, transa, unit_nonunit, n, k, A.data(), lda,
+                                        x.data(), incx, dependencies);
                 break;
             default: break;
         }
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -142,145 +142,145 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class TbmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TbmvUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbmvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbmvUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbmvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TbmvUsmTestSuite, TbmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tbsv.cpp b/tests/unit_tests/blas/level2/tbsv.cpp
index e653105e8..5747091d5 100644
--- a/tests/unit_tests/blas/level2/tbsv.cpp
+++ b/tests/unit_tests/blas/level2/tbsv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int k, int incx,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int k, int incx,
          int lda) {
     // Prepare data.
     vector<fp> x, x_ref, A;
@@ -89,26 +89,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::tbsv(main_queue, upper_lower, transa, unit_nonunit,
-                                                      n, k, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::tbsv(main_queue, upper_lower, transa,
+                                                       unit_nonunit, n, k, A_buffer, lda, x_buffer,
+                                                       incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::tbsv(main_queue, upper_lower, transa, unit_nonunit, n,
-                                                   k, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::tbsv(main_queue, upper_lower, transa, unit_nonunit,
+                                                    n, k, A_buffer, lda, x_buffer, incx);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tbsv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tbsv,
                                         upper_lower, transa, unit_nonunit, n, k, A_buffer, lda,
                                         x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tbsv, upper_lower,
-                                        transa, unit_nonunit, n, k, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbsv,
+                                        upper_lower, transa, unit_nonunit, n, k, A_buffer, lda,
+                                        x_buffer, incx);
                 break;
             default: break;
         }
@@ -119,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -134,146 +136,146 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class TbsvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TbsvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TbsvTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbsvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbsvTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbsvTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TbsvTestSuite, TbsvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tbsv_usm.cpp b/tests/unit_tests/blas/level2/tbsv_usm.cpp
index 1b77997eb..20b8a947e 100644
--- a/tests/unit_tests/blas/level2/tbsv_usm.cpp
+++ b/tests/unit_tests/blas/level2/tbsv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int k, int incx,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int k, int incx,
          int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -91,30 +91,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::tbsv(main_queue, upper_lower, transa,
-                                                             unit_nonunit, n, k, A.data(), lda,
-                                                             x.data(), incx, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::tbsv(main_queue, upper_lower, transa,
+                                                              unit_nonunit, n, k, A.data(), lda,
+                                                              x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::tbsv(main_queue, upper_lower, transa,
-                                                          unit_nonunit, n, k, A.data(), lda,
-                                                          x.data(), incx, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::tbsv(main_queue, upper_lower, transa,
+                                                           unit_nonunit, n, k, A.data(), lda,
+                                                           x.data(), incx, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tbsv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tbsv,
                                         upper_lower, transa, unit_nonunit, n, k, A.data(), lda,
                                         x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tbsv, upper_lower,
-                                        transa, unit_nonunit, n, k, A.data(), lda, x.data(), incx,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbsv,
+                                        upper_lower, transa, unit_nonunit, n, k, A.data(), lda,
+                                        x.data(), incx, dependencies);
                 break;
             default: break;
         }
@@ -126,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -142,145 +142,145 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class TbsvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TbsvUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbsvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbsvUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 TEST_P(TbsvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 5, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 5, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TbsvUsmTestSuite, TbsvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tpmv.cpp b/tests/unit_tests/blas/level2/tpmv.cpp
index ce45279bb..b52a50656 100644
--- a/tests/unit_tests/blas/level2/tpmv.cpp
+++ b/tests/unit_tests/blas/level2/tpmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx) {
     // Prepare data.
     vector<fp> x, x_ref, A;
     rand_vector(x, n, incx);
@@ -87,26 +87,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::tpmv(main_queue, upper_lower, transa, unit_nonunit,
-                                                      n, A_buffer, x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::tpmv(main_queue, upper_lower, transa,
+                                                       unit_nonunit, n, A_buffer, x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::tpmv(main_queue, upper_lower, transa, unit_nonunit, n,
-                                                   A_buffer, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::tpmv(main_queue, upper_lower, transa, unit_nonunit,
+                                                    n, A_buffer, x_buffer, incx);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tpmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tpmv,
                                         upper_lower, transa, unit_nonunit, n, A_buffer, x_buffer,
                                         incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tpmv, upper_lower,
-                                        transa, unit_nonunit, n, A_buffer, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpmv,
+                                        upper_lower, transa, unit_nonunit, n, A_buffer, x_buffer,
+                                        incx);
                 break;
             default: break;
         }
@@ -117,7 +118,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -132,146 +133,146 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class TpmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TpmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TpmvTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpmvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpmvTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpmvTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 
 INSTANTIATE_TEST_SUITE_P(TpmvTestSuite, TpmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tpmv_usm.cpp b/tests/unit_tests/blas/level2/tpmv_usm.cpp
index 74ebc2502..40722a09c 100644
--- a/tests/unit_tests/blas/level2/tpmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/tpmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -89,30 +89,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::tpmv(main_queue, upper_lower, transa,
-                                                             unit_nonunit, n, A.data(), x.data(),
-                                                             incx, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::tpmv(main_queue, upper_lower, transa,
+                                                              unit_nonunit, n, A.data(), x.data(),
+                                                              incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::tpmv(main_queue, upper_lower, transa,
-                                                          unit_nonunit, n, A.data(), x.data(), incx,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::tpmv(main_queue, upper_lower, transa,
+                                                           unit_nonunit, n, A.data(), x.data(),
+                                                           incx, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tpmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tpmv,
                                         upper_lower, transa, unit_nonunit, n, A.data(), x.data(),
                                         incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tpmv, upper_lower,
-                                        transa, unit_nonunit, n, A.data(), x.data(), incx,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpmv,
+                                        upper_lower, transa, unit_nonunit, n, A.data(), x.data(),
+                                        incx, dependencies);
                 break;
             default: break;
         }
@@ -124,7 +124,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -140,145 +140,145 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class TpmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TpmvUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpmvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpmvUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpmvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 
 INSTANTIATE_TEST_SUITE_P(TpmvUsmTestSuite, TpmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tpsv.cpp b/tests/unit_tests/blas/level2/tpsv.cpp
index 2a12ab1da..daebf6d58 100644
--- a/tests/unit_tests/blas/level2/tpsv.cpp
+++ b/tests/unit_tests/blas/level2/tpsv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx) {
     // Prepare data.
     vector<fp> x, x_ref, A;
     rand_vector(x, n, incx);
@@ -87,26 +87,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::tpsv(main_queue, upper_lower, transa, unit_nonunit,
-                                                      n, A_buffer, x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::tpsv(main_queue, upper_lower, transa,
+                                                       unit_nonunit, n, A_buffer, x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::tpsv(main_queue, upper_lower, transa, unit_nonunit, n,
-                                                   A_buffer, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::tpsv(main_queue, upper_lower, transa, unit_nonunit,
+                                                    n, A_buffer, x_buffer, incx);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tpsv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tpsv,
                                         upper_lower, transa, unit_nonunit, n, A_buffer, x_buffer,
                                         incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tpsv, upper_lower,
-                                        transa, unit_nonunit, n, A_buffer, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpsv,
+                                        upper_lower, transa, unit_nonunit, n, A_buffer, x_buffer,
+                                        incx);
                 break;
             default: break;
         }
@@ -117,7 +118,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -132,146 +133,146 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class TpsvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TpsvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TpsvTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpsvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpsvTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpsvTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 
 INSTANTIATE_TEST_SUITE_P(TpsvTestSuite, TpsvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/tpsv_usm.cpp b/tests/unit_tests/blas/level2/tpsv_usm.cpp
index bcb676843..a41d0e3a0 100644
--- a/tests/unit_tests/blas/level2/tpsv_usm.cpp
+++ b/tests/unit_tests/blas/level2/tpsv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -89,30 +89,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::tpsv(main_queue, upper_lower, transa,
-                                                             unit_nonunit, n, A.data(), x.data(),
-                                                             incx, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::tpsv(main_queue, upper_lower, transa,
+                                                              unit_nonunit, n, A.data(), x.data(),
+                                                              incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::tpsv(main_queue, upper_lower, transa,
-                                                          unit_nonunit, n, A.data(), x.data(), incx,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::tpsv(main_queue, upper_lower, transa,
+                                                           unit_nonunit, n, A.data(), x.data(),
+                                                           incx, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::tpsv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::tpsv,
                                         upper_lower, transa, unit_nonunit, n, A.data(), x.data(),
                                         incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::tpsv, upper_lower,
-                                        transa, unit_nonunit, n, A.data(), x.data(), incx,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpsv,
+                                        upper_lower, transa, unit_nonunit, n, A.data(), x.data(),
+                                        incx, dependencies);
                 break;
             default: break;
         }
@@ -124,7 +124,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -140,145 +140,145 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class TpsvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TpsvUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpsvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpsvUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 TEST_P(TpsvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2));
 }
 
 INSTANTIATE_TEST_SUITE_P(TpsvUsmTestSuite, TpsvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/trmv.cpp b/tests/unit_tests/blas/level2/trmv.cpp
index 8dfc517eb..1a99590cd 100644
--- a/tests/unit_tests/blas/level2/trmv.cpp
+++ b/tests/unit_tests/blas/level2/trmv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,9 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx, int lda) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx,
+         int lda) {
     // Prepare data.
     vector<fp> x, x_ref, A;
     rand_vector(x, n, incx);
@@ -87,26 +88,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::trmv(main_queue, upper_lower, transa, unit_nonunit,
-                                                      n, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::trmv(main_queue, upper_lower, transa,
+                                                       unit_nonunit, n, A_buffer, lda, x_buffer,
+                                                       incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::trmv(main_queue, upper_lower, transa, unit_nonunit, n,
-                                                   A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::trmv(main_queue, upper_lower, transa, unit_nonunit,
+                                                    n, A_buffer, lda, x_buffer, incx);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trmv,
                                         upper_lower, transa, unit_nonunit, n, A_buffer, lda,
                                         x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trmv, upper_lower,
-                                        transa, unit_nonunit, n, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmv,
+                                        upper_lower, transa, unit_nonunit, n, A_buffer, lda,
+                                        x_buffer, incx);
                 break;
             default: break;
         }
@@ -117,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -132,146 +135,146 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class TrmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TrmvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TrmvTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrmvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrmvTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrmvTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrmvTestSuite, TrmvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/trmv_usm.cpp b/tests/unit_tests/blas/level2/trmv_usm.cpp
index af3e4b898..d11d9a68a 100644
--- a/tests/unit_tests/blas/level2/trmv_usm.cpp
+++ b/tests/unit_tests/blas/level2/trmv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,9 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx, int lda) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx,
+         int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -89,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::trmv(main_queue, upper_lower, transa,
-                                                             unit_nonunit, n, A.data(), lda,
-                                                             x.data(), incx, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::trmv(main_queue, upper_lower, transa,
+                                                              unit_nonunit, n, A.data(), lda,
+                                                              x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::trmv(main_queue, upper_lower, transa,
-                                                          unit_nonunit, n, A.data(), lda, x.data(),
-                                                          incx, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::trmv(main_queue, upper_lower, transa,
+                                                           unit_nonunit, n, A.data(), lda, x.data(),
+                                                           incx, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trmv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trmv,
                                         upper_lower, transa, unit_nonunit, n, A.data(), lda,
                                         x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trmv, upper_lower,
-                                        transa, unit_nonunit, n, A.data(), lda, x.data(), incx,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmv,
+                                        upper_lower, transa, unit_nonunit, n, A.data(), lda,
+                                        x.data(), incx, dependencies);
                 break;
             default: break;
         }
@@ -124,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -140,145 +141,145 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class TrmvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TrmvUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrmvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrmvUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrmvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrmvUsmTestSuite, TrmvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/trsv.cpp b/tests/unit_tests/blas/level2/trsv.cpp
index fb1e39e06..05c64e97b 100644
--- a/tests/unit_tests/blas/level2/trsv.cpp
+++ b/tests/unit_tests/blas/level2/trsv.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,9 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx, int lda) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx,
+         int lda) {
     // Prepare data.
     vector<fp> x, x_ref, A;
     rand_vector(x, n, incx);
@@ -87,26 +88,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::trsv(main_queue, upper_lower, transa, unit_nonunit,
-                                                      n, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::trsv(main_queue, upper_lower, transa,
+                                                       unit_nonunit, n, A_buffer, lda, x_buffer,
+                                                       incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::trsv(main_queue, upper_lower, transa, unit_nonunit, n,
-                                                   A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::trsv(main_queue, upper_lower, transa, unit_nonunit,
+                                                    n, A_buffer, lda, x_buffer, incx);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsv,
                                         upper_lower, transa, unit_nonunit, n, A_buffer, lda,
                                         x_buffer, incx);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsv, upper_lower,
-                                        transa, unit_nonunit, n, A_buffer, lda, x_buffer, incx);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsv,
+                                        upper_lower, transa, unit_nonunit, n, A_buffer, lda,
+                                        x_buffer, incx);
                 break;
             default: break;
         }
@@ -117,7 +120,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -132,146 +135,146 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class TrsvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TrsvTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TrsvTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrsvTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrsvTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrsvTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrsvTestSuite, TrsvTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level2/trsv_usm.cpp b/tests/unit_tests/blas/level2/trsv_usm.cpp
index 2e6242d58..98dbb5063 100644
--- a/tests/unit_tests/blas/level2/trsv_usm.cpp
+++ b/tests/unit_tests/blas/level2/trsv_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,9 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose transa, oneapi::mkl::diag unit_nonunit, int n, int incx, int lda) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx,
+         int lda) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -89,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::trsv(main_queue, upper_lower, transa,
-                                                             unit_nonunit, n, A.data(), lda,
-                                                             x.data(), incx, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::trsv(main_queue, upper_lower, transa,
+                                                              unit_nonunit, n, A.data(), lda,
+                                                              x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::trsv(main_queue, upper_lower, transa,
-                                                          unit_nonunit, n, A.data(), lda, x.data(),
-                                                          incx, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::trsv(main_queue, upper_lower, transa,
+                                                           unit_nonunit, n, A.data(), lda, x.data(),
+                                                           incx, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsv,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsv,
                                         upper_lower, transa, unit_nonunit, n, A.data(), lda,
                                         x.data(), incx, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsv, upper_lower,
-                                        transa, unit_nonunit, n, A.data(), lda, x.data(), incx,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsv,
+                                        upper_lower, transa, unit_nonunit, n, A.data(), lda,
+                                        x.data(), incx, dependencies);
                 break;
             default: break;
         }
@@ -124,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -140,145 +141,145 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class TrsvUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TrsvUsmTests, RealSinglePrecision) {
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::unit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                  oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                  oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrsvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::unit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans,
-                                   oneapi::mkl::diag::nonunit, 30, 2, 42));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans,
+                                   oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrsvUsmTests, ComplexSinglePrecision) {
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 TEST_P(TrsvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::unit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 30, 2, 42));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 30, 2, 42));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrsvUsmTestSuite, TrsvUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/CMakeLists.txt b/tests/unit_tests/blas/level3/CMakeLists.txt
index ad6d2f273..ee2e8e3f8 100644
--- a/tests/unit_tests/blas/level3/CMakeLists.txt
+++ b/tests/unit_tests/blas/level3/CMakeLists.txt
@@ -41,7 +41,7 @@ if(BUILD_SHARED_LIBS)
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET blas_level3_rt SOURCES ${L3_SOURCES})
   else()
-    target_link_libraries(blas_level3_rt PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(blas_level3_rt PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 endif()
 
@@ -58,7 +58,7 @@ target_include_directories(blas_level3_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET blas_level3_ct SOURCES ${L3_SOURCES})
 else()
-  target_link_libraries(blas_level3_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(blas_level3_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
 
diff --git a/tests/unit_tests/blas/level3/gemm.cpp b/tests/unit_tests/blas/level3/gemm.cpp
index 564700b16..0350cdb1a 100644
--- a/tests/unit_tests/blas/level3/gemm.cpp
+++ b/tests/unit_tests/blas/level3/gemm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,8 +48,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename Ta, typename Tc>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
-         oneapi::mkl::transpose transb, int m, int n, int k, int lda, int ldb, int ldc, Tc alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa,
+         oneapi::math::transpose transb, int m, int n, int k, int lda, int ldb, int ldc, Tc alpha,
          Tc beta) {
     // Prepare data.
     vector<Ta, allocator_helper<Ta, 64>> A, B;
@@ -57,7 +57,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 
     rand_matrix(A, layout, transa, m, k, lda);
     rand_matrix(B, layout, transb, k, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
     C_ref = C;
 
     // Call Reference GEMM.
@@ -97,27 +97,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::gemm(main_queue, transa, transb, m, n, k, alpha,
-                                                      A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                      ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::gemm(main_queue, transa, transb, m, n, k, alpha,
+                                                       A_buffer, lda, B_buffer, ldb, beta, C_buffer,
+                                                       ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::gemm(main_queue, transa, transb, m, n, k, alpha,
-                                                   A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                   ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::gemm(main_queue, transa, transb, m, n, k, alpha,
+                                                    A_buffer, lda, B_buffer, ldb, beta, C_buffer,
+                                                    ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm, transa,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm, transa,
                                         transb, m, n, k, alpha, A_buffer, lda, B_buffer, ldb, beta,
                                         C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm, transa,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm, transa,
                                         transb, m, n, k, alpha, A_buffer, lda, B_buffer, ldb, beta,
                                         C_buffer, ldc);
                 break;
@@ -130,7 +130,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -145,78 +145,78 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     return (int)good;
 }
 
-class GemmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class GemmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(GemmTests, Bfloat16Bfloat16FloatPrecision) {
     float alpha(2.0);
     float beta(3.0);
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmTests, HalfHalfFloatPrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmTests, RealHalfPrecision) {
     sycl::half alpha(2.0);
     sycl::half beta(3.0);
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 3, 8, 9, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 3, 8, 9, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmTests, RealDoublePrecision) {
@@ -225,49 +225,49 @@ TEST_P(GemmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmTests, ComplexDoublePrecision) {
@@ -276,38 +276,38 @@ TEST_P(GemmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemmTestSuite, GemmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/gemm_usm.cpp b/tests/unit_tests/blas/level3/gemm_usm.cpp
index 9d5d8d048..a18e79d79 100644
--- a/tests/unit_tests/blas/level3/gemm_usm.cpp
+++ b/tests/unit_tests/blas/level3/gemm_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename Ta, typename Tc>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
-         oneapi::mkl::transpose transb, int m, int n, int k, int lda, int ldb, int ldc, Tc alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa,
+         oneapi::math::transpose transb, int m, int n, int k, int lda, int ldb, int ldc, Tc alpha,
          Tc beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -76,7 +76,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     vector<Tc, decltype(uc)> C(ua);
     rand_matrix(A, layout, transa, m, k, lda);
     rand_matrix(B, layout, transb, k, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
 
     auto C_ref = C;
 
@@ -97,28 +97,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::gemm(main_queue, transa, transb, m, n, k,
-                                                             alpha, A.data(), lda, B.data(), ldb,
-                                                             beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::gemm(main_queue, transa, transb, m, n, k,
+                                                              alpha, A.data(), lda, B.data(), ldb,
+                                                              beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::gemm(main_queue, transa, transb, m, n, k,
-                                                          alpha, A.data(), lda, B.data(), ldb, beta,
-                                                          C.data(), ldc, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::gemm(main_queue, transa, transb, m, n, k,
+                                                           alpha, A.data(), lda, B.data(), ldb,
+                                                           beta, C.data(), ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm, transa,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm, transa,
                                         transb, m, n, k, alpha, A.data(), lda, B.data(), ldb, beta,
                                         C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm, transa,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm, transa,
                                         transb, m, n, k, alpha, A.data(), lda, B.data(), ldb, beta,
                                         C.data(), ldc, dependencies);
                 break;
@@ -132,7 +132,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -148,74 +148,74 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa,
 }
 
 class GemmUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(GemmUsmTests, Bfloat16Bfloat16FloatPrecision) {
     float alpha(2.0);
     float beta(3.0);
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
-    EXPECT_TRUEORSKIP((test<oneapi::mkl::bfloat16, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+    EXPECT_TRUEORSKIP((test<oneapi::math::bfloat16, float>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmUsmTests, HalfHalfFloatPrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmUsmTests, RealHalfPrecision) {
     sycl::half alpha(2.0);
     sycl::half beta(3.0);
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<sycl::half, sycl::half>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<float, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmUsmTests, RealDoublePrecision) {
@@ -224,49 +224,49 @@ TEST_P(GemmUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<double, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 TEST_P(GemmUsmTests, ComplexDoublePrecision) {
@@ -275,38 +275,38 @@ TEST_P(GemmUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::nontrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::trans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::nontrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::trans, 79, 83, 91, 103, 105, 106, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::transpose::conjtrans,
-        oneapi::mkl::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans,
+        oneapi::math::transpose::conjtrans, 79, 83, 91, 103, 105, 106, alpha, beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(GemmUsmTestSuite, GemmUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/hemm.cpp b/tests/unit_tests/blas/level3/hemm.cpp
index ce050e97d..6886195cf 100644
--- a/tests/unit_tests/blas/level3/hemm.cpp
+++ b/tests/unit_tests/blas/level3/hemm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,17 +48,17 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
          fp beta) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, B, C, C_ref;
-    if (left_right == oneapi::mkl::side::left)
-        rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, m, lda);
+    if (left_right == oneapi::math::side::left)
+        rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, m, lda);
     else
-        rand_matrix(A, oneapi::mkl::transpose::nontrans, n, n, lda);
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+        rand_matrix(A, oneapi::math::transpose::nontrans, n, n, lda);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
     C_ref = C;
 
     // Call Reference HEMM.
@@ -96,27 +96,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::hemm(main_queue, left_right, upper_lower, m, n,
-                                                      alpha, A_buffer, lda, B_buffer, ldb, beta,
-                                                      C_buffer, ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::hemm(main_queue, left_right, upper_lower, m, n,
+                                                       alpha, A_buffer, lda, B_buffer, ldb, beta,
+                                                       C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::hemm(main_queue, left_right, upper_lower, m, n, alpha,
-                                                   A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                   ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::hemm(main_queue, left_right, upper_lower, m, n,
+                                                    alpha, A_buffer, lda, B_buffer, ldb, beta,
+                                                    C_buffer, ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hemm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hemm,
                                         left_right, upper_lower, m, n, alpha, A_buffer, lda,
                                         B_buffer, ldb, beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hemm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemm, left_right,
                                         upper_lower, m, n, alpha, A_buffer, lda, B_buffer, ldb,
                                         beta, C_buffer, ldc);
                 break;
@@ -129,7 +129,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -145,48 +145,48 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     return (int)good;
 }
 
-class HemmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class HemmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(HemmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
                                                 72, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 TEST_P(HemmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(HemmTestSuite, HemmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/hemm_usm.cpp b/tests/unit_tests/blas/level3/hemm_usm.cpp
index eafb06ea5..f898dc002 100644
--- a/tests/unit_tests/blas/level3/hemm_usm.cpp
+++ b/tests/unit_tests/blas/level3/hemm_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
          fp beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -72,12 +72,12 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     // Prepare data.
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> A(ua), B(ua), C(ua);
-    if (left_right == oneapi::mkl::side::left)
-        rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, m, lda);
+    if (left_right == oneapi::math::side::left)
+        rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, m, lda);
     else
-        rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+        rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
 
     auto C_ref = C;
 
@@ -95,28 +95,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::hemm(main_queue, left_right, upper_lower, m,
-                                                             n, alpha, A.data(), lda, B.data(), ldb,
-                                                             beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::hemm(
+                    main_queue, left_right, upper_lower, m, n, alpha, A.data(), lda, B.data(), ldb,
+                    beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::hemm(main_queue, left_right, upper_lower, m, n,
-                                                          alpha, A.data(), lda, B.data(), ldb, beta,
-                                                          C.data(), ldc, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::hemm(main_queue, left_right, upper_lower, m,
+                                                           n, alpha, A.data(), lda, B.data(), ldb,
+                                                           beta, C.data(), ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::hemm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::hemm,
                                         left_right, upper_lower, m, n, alpha, A.data(), lda,
                                         B.data(), ldb, beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::hemm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemm, left_right,
                                         upper_lower, m, n, alpha, A.data(), lda, B.data(), ldb,
                                         beta, C.data(), ldc, dependencies);
                 break;
@@ -130,7 +130,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -146,47 +146,47 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 }
 
 class HemmUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(HemmUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
                                                 72, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 TEST_P(HemmUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(HemmUsmTestSuite, HemmUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/her2k.cpp b/tests/unit_tests/blas/level3/her2k.cpp
index ce57041d9..9df00b280 100644
--- a/tests/unit_tests/blas/level3/her2k.cpp
+++ b/tests/unit_tests/blas/level3/her2k.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,8 +48,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha,
          fp_scalar beta) {
     fp alpha_row(alpha.real(), -alpha.imag());
 
@@ -57,7 +57,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, allocator_helper<fp, 64>> A, B, C, C_ref;
     rand_matrix(A, layout, trans, n, k, lda);
     rand_matrix(B, layout, trans, n, k, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
     C_ref = C;
 
     // Call Reference HER2K.
@@ -65,11 +65,11 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     const int lda_ref = lda, ldb_ref = ldb, ldc_ref = ldc;
 
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     ::her2k(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower),
             convert_to_cblas_trans(trans), &n_ref, &k_ref, (fp_ref*)&alpha, (fp_ref*)A.data(),
-            &lda_ref, (fp_ref*)B.data(), &ldb_ref, (fp_scalar_mkl*)&beta, (fp_ref*)C_ref.data(),
+            &lda_ref, (fp_ref*)B.data(), &ldb_ref, (fp_scalar_ref*)&beta, (fp_ref*)C_ref.data(),
             &ldc_ref);
 
     // Call DPC++ HER2K.
@@ -97,27 +97,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::her2k(main_queue, upper_lower, trans, n, k, alpha,
-                                                       A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                       ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::her2k(main_queue, upper_lower, trans, n, k, alpha,
+                                                        A_buffer, lda, B_buffer, ldb, beta,
+                                                        C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::her2k(main_queue, upper_lower, trans, n, k, alpha,
-                                                    A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                    ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::her2k(main_queue, upper_lower, trans, n, k, alpha,
+                                                     A_buffer, lda, B_buffer, ldb, beta, C_buffer,
+                                                     ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::her2k,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::her2k,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, B_buffer,
                                         ldb, beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::her2k,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2k,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, B_buffer,
                                         ldb, beta, C_buffer, ldc);
                 break;
@@ -130,7 +130,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -146,24 +146,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class Her2kTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
-};
+class Her2kTests
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Her2kTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     float beta(1.0);
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
 }
 TEST_P(Her2kTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -171,23 +171,23 @@ TEST_P(Her2kTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     double beta(1.0);
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(Her2kTestSuite, Her2kTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/her2k_usm.cpp b/tests/unit_tests/blas/level3/her2k_usm.cpp
index a4ada6cb2..c975104cf 100644
--- a/tests/unit_tests/blas/level3/her2k_usm.cpp
+++ b/tests/unit_tests/blas/level3/her2k_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha,
          fp_scalar beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -76,7 +76,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> A(ua), B(ua), C(ua);
     rand_matrix(A, layout, trans, n, k, lda);
     rand_matrix(B, layout, trans, n, k, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
 
     auto C_ref = C;
 
@@ -85,11 +85,11 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     const int lda_ref = lda, ldb_ref = ldb, ldc_ref = ldc;
 
     using fp_ref = typename ref_type_info<fp>::type;
-    using fp_scalar_mkl = typename ref_type_info<fp_scalar>::type;
+    using fp_scalar_ref = typename ref_type_info<fp_scalar>::type;
 
     ::her2k(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower),
             convert_to_cblas_trans(trans), &n_ref, &k_ref, (fp_ref*)&alpha, (fp_ref*)A.data(),
-            &lda_ref, (fp_ref*)B.data(), &ldb_ref, (fp_scalar_mkl*)&beta, (fp_ref*)C_ref.data(),
+            &lda_ref, (fp_ref*)B.data(), &ldb_ref, (fp_scalar_ref*)&beta, (fp_ref*)C_ref.data(),
             &ldc_ref);
 
     // Call DPC++ HER2K.
@@ -97,28 +97,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::her2k(main_queue, upper_lower, trans, n, k,
-                                                              alpha, A.data(), lda, B.data(), ldb,
-                                                              beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::her2k(main_queue, upper_lower, trans, n, k,
+                                                               alpha, A.data(), lda, B.data(), ldb,
+                                                               beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::her2k(main_queue, upper_lower, trans, n, k,
-                                                           alpha, A.data(), lda, B.data(), ldb,
-                                                           beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::her2k(main_queue, upper_lower, trans, n, k,
+                                                            alpha, A.data(), lda, B.data(), ldb,
+                                                            beta, C.data(), ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::her2k,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::her2k,
                                         upper_lower, trans, n, k, alpha, A.data(), lda, B.data(),
                                         ldb, beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::her2k,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2k,
                                         upper_lower, trans, n, k, alpha, A.data(), lda, B.data(),
                                         ldb, beta, C.data(), ldc, dependencies);
                 break;
@@ -132,7 +132,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -148,23 +148,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class Her2kUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Her2kUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     float beta(1.0);
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
 }
 TEST_P(Her2kUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -172,23 +172,23 @@ TEST_P(Her2kUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(2.0, -0.5);
     double beta(1.0);
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 102, 103, alpha, beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(Her2kUsmTestSuite, Her2kUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/herk.cpp b/tests/unit_tests/blas/level3/herk.cpp
index f908a77b7..9c0e858b2 100644
--- a/tests/unit_tests/blas/level3/herk.cpp
+++ b/tests/unit_tests/blas/level3/herk.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,13 +48,13 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldc, fp_scalar alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldc, fp_scalar alpha,
          fp_scalar beta) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, C, C_ref;
     rand_matrix(A, layout, trans, n, k, lda);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
     C_ref = C;
 
     // Call Reference HERK.
@@ -91,26 +91,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::herk(main_queue, upper_lower, trans, n, k, alpha,
-                                                      A_buffer, lda, beta, C_buffer, ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::herk(main_queue, upper_lower, trans, n, k, alpha,
+                                                       A_buffer, lda, beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::herk(main_queue, upper_lower, trans, n, k, alpha,
-                                                   A_buffer, lda, beta, C_buffer, ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::herk(main_queue, upper_lower, trans, n, k, alpha,
+                                                    A_buffer, lda, beta, C_buffer, ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::herk,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::herk,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, beta,
                                         C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::herk, upper_lower,
-                                        trans, n, k, alpha, A_buffer, lda, beta, C_buffer, ldc);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::herk,
+                                        upper_lower, trans, n, k, alpha, A_buffer, lda, beta,
+                                        C_buffer, ldc);
                 break;
             default: break;
         }
@@ -121,7 +122,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -137,24 +138,24 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class HerkTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class HerkTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(HerkTests, ComplexSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
 }
 TEST_P(HerkTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -162,23 +163,23 @@ TEST_P(HerkTests, ComplexDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(HerkTestSuite, HerkTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/herk_usm.cpp b/tests/unit_tests/blas/level3/herk_usm.cpp
index 470159c63..82e37cb22 100644
--- a/tests/unit_tests/blas/level3/herk_usm.cpp
+++ b/tests/unit_tests/blas/level3/herk_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp, typename fp_scalar>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldc, fp_scalar alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldc, fp_scalar alpha,
          fp_scalar beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -73,7 +73,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> A(ua), C(ua);
     rand_matrix(A, layout, trans, n, k, lda);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
 
     auto C_ref = C;
 
@@ -92,30 +92,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::herk(main_queue, upper_lower, trans, n, k,
-                                                             alpha, A.data(), lda, beta, C.data(),
-                                                             ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::herk(main_queue, upper_lower, trans, n, k,
+                                                              alpha, A.data(), lda, beta, C.data(),
+                                                              ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::herk(main_queue, upper_lower, trans, n, k,
-                                                          alpha, A.data(), lda, beta, C.data(), ldc,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::herk(main_queue, upper_lower, trans, n, k,
+                                                           alpha, A.data(), lda, beta, C.data(),
+                                                           ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::herk,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::herk,
                                         upper_lower, trans, n, k, alpha, A.data(), lda, beta,
                                         C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::herk, upper_lower,
-                                        trans, n, k, alpha, A.data(), lda, beta, C.data(), ldc,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::herk,
+                                        upper_lower, trans, n, k, alpha, A.data(), lda, beta,
+                                        C.data(), ldc, dependencies);
                 break;
             default: break;
         }
@@ -127,7 +127,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -143,23 +143,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class HerkUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(HerkUsmTests, ComplexSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<float>, float>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
 }
 TEST_P(HerkUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -167,23 +167,23 @@ TEST_P(HerkUsmTests, ComplexDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
     EXPECT_TRUEORSKIP((test<std::complex<double>, double>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::conjtrans, 72, 27, 101, 103, alpha, beta)));
 }
 
 INSTANTIATE_TEST_SUITE_P(HerkUsmTestSuite, HerkUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/symm.cpp b/tests/unit_tests/blas/level3/symm.cpp
index 3f6920370..0668adbff 100644
--- a/tests/unit_tests/blas/level3/symm.cpp
+++ b/tests/unit_tests/blas/level3/symm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,17 +48,17 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
          fp beta) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, B, C, C_ref;
-    if (left_right == oneapi::mkl::side::left)
-        rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, m, lda);
+    if (left_right == oneapi::math::side::left)
+        rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, m, lda);
     else
-        rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+        rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
     C_ref = C;
 
     // Call Reference SYMM.
@@ -96,27 +96,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::symm(main_queue, left_right, upper_lower, m, n,
-                                                      alpha, A_buffer, lda, B_buffer, ldb, beta,
-                                                      C_buffer, ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::symm(main_queue, left_right, upper_lower, m, n,
+                                                       alpha, A_buffer, lda, B_buffer, ldb, beta,
+                                                       C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::symm(main_queue, left_right, upper_lower, m, n, alpha,
-                                                   A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                   ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::symm(main_queue, left_right, upper_lower, m, n,
+                                                    alpha, A_buffer, lda, B_buffer, ldb, beta,
+                                                    C_buffer, ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::symm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::symm,
                                         left_right, upper_lower, m, n, alpha, A_buffer, lda,
                                         B_buffer, ldb, beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::symm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symm, left_right,
                                         upper_lower, m, n, alpha, A_buffer, lda, B_buffer, ldb,
                                         beta, C_buffer, ldc);
                 break;
@@ -129,7 +129,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -145,23 +145,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     return (int)good;
 }
 
-class SymmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class SymmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(SymmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower, 72, 27, 101,
+                                  oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101,
                                   102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower, 72, 27, 101,
+                                  oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27, 101,
                                   102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper, 72, 27, 101,
+                                  oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101,
                                   102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper, 72, 27, 101,
+                                  oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27, 101,
                                   102, 103, alpha, beta));
 }
 TEST_P(SymmTests, RealDoublePrecision) {
@@ -170,57 +170,57 @@ TEST_P(SymmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower, 72, 27, 101,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101,
                                    102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower, 72, 27, 101,
-                                   102, 103, alpha, beta));
+                                   oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27,
+                                   101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper, 72, 27, 101,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101,
                                    102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper, 72, 27, 101,
-                                   102, 103, alpha, beta));
+                                   oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27,
+                                   101, 102, 103, alpha, beta));
 }
 TEST_P(SymmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 TEST_P(SymmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(SymmTestSuite, SymmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/symm_usm.cpp b/tests/unit_tests/blas/level3/symm_usm.cpp
index f774e82e3..dc8c00749 100644
--- a/tests/unit_tests/blas/level3/symm_usm.cpp
+++ b/tests/unit_tests/blas/level3/symm_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, int m, int n, int lda, int ldb, int ldc, fp alpha,
          fp beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
@@ -72,12 +72,12 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     // Prepare data.
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> A(ua), B(ua), C(ua);
-    if (left_right == oneapi::mkl::side::left)
-        rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, m, m, lda);
+    if (left_right == oneapi::math::side::left)
+        rand_matrix(A, layout, oneapi::math::transpose::nontrans, m, m, lda);
     else
-        rand_matrix(A, layout, oneapi::mkl::transpose::nontrans, n, n, lda);
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, m, n, ldc);
+        rand_matrix(A, layout, oneapi::math::transpose::nontrans, n, n, lda);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc);
 
     auto C_ref = C;
 
@@ -95,28 +95,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::symm(main_queue, left_right, upper_lower, m,
-                                                             n, alpha, A.data(), lda, B.data(), ldb,
-                                                             beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::symm(
+                    main_queue, left_right, upper_lower, m, n, alpha, A.data(), lda, B.data(), ldb,
+                    beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::symm(main_queue, left_right, upper_lower, m, n,
-                                                          alpha, A.data(), lda, B.data(), ldb, beta,
-                                                          C.data(), ldc, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::symm(main_queue, left_right, upper_lower, m,
+                                                           n, alpha, A.data(), lda, B.data(), ldb,
+                                                           beta, C.data(), ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::symm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::symm,
                                         left_right, upper_lower, m, n, alpha, A.data(), lda,
                                         B.data(), ldb, beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::symm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symm, left_right,
                                         upper_lower, m, n, alpha, A.data(), lda, B.data(), ldb,
                                         beta, C.data(), ldc, dependencies);
                 break;
@@ -130,7 +130,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -146,22 +146,22 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 }
 
 class SymmUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SymmUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower, 72, 27, 101,
+                                  oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101,
                                   102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower, 72, 27, 101,
+                                  oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27, 101,
                                   102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper, 72, 27, 101,
+                                  oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101,
                                   102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper, 72, 27, 101,
+                                  oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27, 101,
                                   102, 103, alpha, beta));
 }
 TEST_P(SymmUsmTests, RealDoublePrecision) {
@@ -170,57 +170,57 @@ TEST_P(SymmUsmTests, RealDoublePrecision) {
     double alpha(2.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower, 72, 27, 101,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101,
                                    102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower, 72, 27, 101,
-                                   102, 103, alpha, beta));
+                                   oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27,
+                                   101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper, 72, 27, 101,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101,
                                    102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper, 72, 27, 101,
-                                   102, 103, alpha, beta));
+                                   oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27,
+                                   101, 102, 103, alpha, beta));
 }
 TEST_P(SymmUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 TEST_P(SymmUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(SymmUsmTestSuite, SymmUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/syr2k.cpp b/tests/unit_tests/blas/level3/syr2k.cpp
index 0153e9ec0..0ecb2ad6b 100644
--- a/tests/unit_tests/blas/level3/syr2k.cpp
+++ b/tests/unit_tests/blas/level3/syr2k.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,13 +48,14 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha, fp beta) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha,
+         fp beta) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, B, C, C_ref;
     rand_matrix(A, layout, trans, n, k, lda);
     rand_matrix(B, layout, trans, n, k, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
     C_ref = C;
 
     // Call Reference SYR2K.
@@ -92,27 +93,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::syr2k(main_queue, upper_lower, trans, n, k, alpha,
-                                                       A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                       ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::syr2k(main_queue, upper_lower, trans, n, k, alpha,
+                                                        A_buffer, lda, B_buffer, ldb, beta,
+                                                        C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::syr2k(main_queue, upper_lower, trans, n, k, alpha,
-                                                    A_buffer, lda, B_buffer, ldb, beta, C_buffer,
-                                                    ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::syr2k(main_queue, upper_lower, trans, n, k, alpha,
+                                                     A_buffer, lda, B_buffer, ldb, beta, C_buffer,
+                                                     ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syr2k,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syr2k,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, B_buffer,
                                         ldb, beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syr2k,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2k,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, B_buffer,
                                         ldb, beta, C_buffer, ldc);
                 break;
@@ -125,7 +126,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,23 +142,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class Syr2kTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
-};
+class Syr2kTests
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Syr2kTests, RealSinglePrecision) {
     float alpha(3.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27,
                                   101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27,
                                   101, 102, 103, alpha, beta));
 }
 TEST_P(Syr2kTests, RealDoublePrecision) {
@@ -166,33 +167,33 @@ TEST_P(Syr2kTests, RealDoublePrecision) {
     double alpha(3.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 102, 103, alpha, beta));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73,
+                                   27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 102, 103, alpha, beta));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73,
+                                   27, 101, 102, 103, alpha, beta));
 }
 TEST_P(Syr2kTests, ComplexSinglePrecision) {
     std::complex<float> alpha(3.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
 }
 TEST_P(Syr2kTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -200,23 +201,23 @@ TEST_P(Syr2kTests, ComplexDoublePrecision) {
     std::complex<double> alpha(3.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(Syr2kTestSuite, Syr2kTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/syr2k_usm.cpp b/tests/unit_tests/blas/level3/syr2k_usm.cpp
index efa3f07d3..fd51bda52 100644
--- a/tests/unit_tests/blas/level3/syr2k_usm.cpp
+++ b/tests/unit_tests/blas/level3/syr2k_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,9 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha, fp beta) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha,
+         fp beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -73,7 +74,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     vector<fp, decltype(ua)> A(ua), B(ua), C(ua);
     rand_matrix(A, layout, trans, n, k, lda);
     rand_matrix(B, layout, trans, n, k, ldb);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
 
     auto C_ref = C;
 
@@ -92,28 +93,28 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::syr2k(main_queue, upper_lower, trans, n, k,
-                                                              alpha, A.data(), lda, B.data(), ldb,
-                                                              beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::syr2k(main_queue, upper_lower, trans, n, k,
+                                                               alpha, A.data(), lda, B.data(), ldb,
+                                                               beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::syr2k(main_queue, upper_lower, trans, n, k,
-                                                           alpha, A.data(), lda, B.data(), ldb,
-                                                           beta, C.data(), ldc, dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::syr2k(main_queue, upper_lower, trans, n, k,
+                                                            alpha, A.data(), lda, B.data(), ldb,
+                                                            beta, C.data(), ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syr2k,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syr2k,
                                         upper_lower, trans, n, k, alpha, A.data(), lda, B.data(),
                                         ldb, beta, C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syr2k,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2k,
                                         upper_lower, trans, n, k, alpha, A.data(), lda, B.data(),
                                         ldb, beta, C.data(), ldc, dependencies);
                 break;
@@ -127,7 +128,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -143,22 +144,22 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class Syr2kUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(Syr2kUsmTests, RealSinglePrecision) {
     float alpha(3.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27,
                                   101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27,
                                   101, 102, 103, alpha, beta));
 }
 TEST_P(Syr2kUsmTests, RealDoublePrecision) {
@@ -167,33 +168,33 @@ TEST_P(Syr2kUsmTests, RealDoublePrecision) {
     double alpha(3.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 102, 103, alpha, beta));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73,
+                                   27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 102, 103, alpha, beta));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73,
+                                   27, 101, 102, 103, alpha, beta));
 }
 TEST_P(Syr2kUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(3.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
 }
 TEST_P(Syr2kUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -201,23 +202,23 @@ TEST_P(Syr2kUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(3.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 102, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(Syr2kUsmTestSuite, Syr2kUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/syrk.cpp b/tests/unit_tests/blas/level3/syrk.cpp
index a6b28735d..928aaed84 100644
--- a/tests/unit_tests/blas/level3/syrk.cpp
+++ b/tests/unit_tests/blas/level3/syrk.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,12 +48,12 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldc, fp alpha, fp beta) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldc, fp alpha, fp beta) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, C, C_ref;
     rand_matrix(A, layout, trans, n, k, lda);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
     C_ref = C;
 
     // Call Reference SYRK.
@@ -90,26 +90,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::syrk(main_queue, upper_lower, trans, n, k, alpha,
-                                                      A_buffer, lda, beta, C_buffer, ldc);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::syrk(main_queue, upper_lower, trans, n, k, alpha,
+                                                       A_buffer, lda, beta, C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::syrk(main_queue, upper_lower, trans, n, k, alpha,
-                                                   A_buffer, lda, beta, C_buffer, ldc);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::syrk(main_queue, upper_lower, trans, n, k, alpha,
+                                                    A_buffer, lda, beta, C_buffer, ldc);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syrk,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syrk,
                                         upper_lower, trans, n, k, alpha, A_buffer, lda, beta,
                                         C_buffer, ldc);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syrk, upper_lower,
-                                        trans, n, k, alpha, A_buffer, lda, beta, C_buffer, ldc);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk,
+                                        upper_lower, trans, n, k, alpha, A_buffer, lda, beta,
+                                        C_buffer, ldc);
                 break;
             default: break;
         }
@@ -120,7 +121,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -136,23 +137,23 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     return (int)good;
 }
 
-class SyrkTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class SyrkTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(SyrkTests, RealSinglePrecision) {
     float alpha(3.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27,
                                   101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27,
                                   101, 103, alpha, beta));
 }
 TEST_P(SyrkTests, RealDoublePrecision) {
@@ -161,33 +162,33 @@ TEST_P(SyrkTests, RealDoublePrecision) {
     double alpha(3.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 103, alpha, beta));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73,
+                                   27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 103, alpha, beta));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73,
+                                   27, 101, 103, alpha, beta));
 }
 TEST_P(SyrkTests, ComplexSinglePrecision) {
     std::complex<float> alpha(3.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
 }
 TEST_P(SyrkTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -195,23 +196,23 @@ TEST_P(SyrkTests, ComplexDoublePrecision) {
     std::complex<double> alpha(3.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(SyrkTestSuite, SyrkTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/syrk_usm.cpp b/tests/unit_tests/blas/level3/syrk_usm.cpp
index e5569eb78..2771dd2b7 100644
--- a/tests/unit_tests/blas/level3/syrk_usm.cpp
+++ b/tests/unit_tests/blas/level3/syrk_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,8 +47,8 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
-         oneapi::mkl::transpose trans, int n, int k, int lda, int ldc, fp alpha, fp beta) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower,
+         oneapi::math::transpose trans, int n, int k, int lda, int ldc, fp alpha, fp beta) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,7 +72,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> A(ua), C(ua);
     rand_matrix(A, layout, trans, n, k, lda);
-    rand_matrix(C, layout, oneapi::mkl::transpose::nontrans, n, n, ldc);
+    rand_matrix(C, layout, oneapi::math::transpose::nontrans, n, n, ldc);
 
     auto C_ref = C;
 
@@ -90,30 +90,30 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::syrk(main_queue, upper_lower, trans, n, k,
-                                                             alpha, A.data(), lda, beta, C.data(),
-                                                             ldc, dependencies);
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::syrk(main_queue, upper_lower, trans, n, k,
+                                                              alpha, A.data(), lda, beta, C.data(),
+                                                              ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::syrk(main_queue, upper_lower, trans, n, k,
-                                                          alpha, A.data(), lda, beta, C.data(), ldc,
-                                                          dependencies);
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::syrk(main_queue, upper_lower, trans, n, k,
+                                                           alpha, A.data(), lda, beta, C.data(),
+                                                           ldc, dependencies);
                 break;
             default: break;
         }
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syrk,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syrk,
                                         upper_lower, trans, n, k, alpha, A.data(), lda, beta,
                                         C.data(), ldc, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syrk, upper_lower,
-                                        trans, n, k, alpha, A.data(), lda, beta, C.data(), ldc,
-                                        dependencies);
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk,
+                                        upper_lower, trans, n, k, alpha, A.data(), lda, beta,
+                                        C.data(), ldc, dependencies);
                 break;
             default: break;
         }
@@ -125,7 +125,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -141,22 +141,22 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower,
 }
 
 class SyrkUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(SyrkUsmTests, RealSinglePrecision) {
     float alpha(3.0);
     float beta(3.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                   27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27,
                                   101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
+                                  oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27,
                                   101, 103, alpha, beta));
 }
 TEST_P(SyrkUsmTests, RealDoublePrecision) {
@@ -165,33 +165,33 @@ TEST_P(SyrkUsmTests, RealDoublePrecision) {
     double alpha(3.0);
     double beta(3.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, 73,
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73,
                                    27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 103, alpha, beta));
+                                   oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73,
+                                   27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, 73, 27,
-                                   101, 103, alpha, beta));
+                                   oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73,
+                                   27, 101, 103, alpha, beta));
 }
 TEST_P(SyrkUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(3.0, -0.5);
     std::complex<float> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
 }
 TEST_P(SyrkUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
@@ -199,23 +199,23 @@ TEST_P(SyrkUsmTests, ComplexDoublePrecision) {
     std::complex<double> alpha(3.0, -0.5);
     std::complex<double> beta(3.0, -1.5);
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::lower,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::uplo::upper,
-        oneapi::mkl::transpose::trans, 73, 27, 101, 103, alpha, beta));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper,
+        oneapi::math::transpose::trans, 73, 27, 101, 103, alpha, beta));
 }
 
 INSTANTIATE_TEST_SUITE_P(SyrkUsmTestSuite, SyrkUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/trmm.cpp b/tests/unit_tests/blas/level3/trmm.cpp
index 2a02aa0d1..86d127cce 100644
--- a/tests/unit_tests/blas/level3/trmm.cpp
+++ b/tests/unit_tests/blas/level3/trmm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,17 +48,17 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-         oneapi::mkl::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+         oneapi::math::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, B, B_ref;
-    if (left_right == oneapi::mkl::side::right)
+    if (left_right == oneapi::math::side::right)
         rand_matrix(A, layout, transa, n, n, lda);
     else
         rand_matrix(A, layout, transa, m, m, lda);
 
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
     B_ref = B;
 
     // Call Reference TRMM.
@@ -96,27 +96,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::trmm(main_queue, left_right, upper_lower, transa,
-                                                      unit_nonunit, m, n, alpha, A_buffer, lda,
-                                                      B_buffer, ldb);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::trmm(main_queue, left_right, upper_lower, transa,
+                                                       unit_nonunit, m, n, alpha, A_buffer, lda,
+                                                       B_buffer, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::trmm(main_queue, left_right, upper_lower, transa,
-                                                   unit_nonunit, m, n, alpha, A_buffer, lda,
-                                                   B_buffer, ldb);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::trmm(main_queue, left_right, upper_lower, transa,
+                                                    unit_nonunit, m, n, alpha, A_buffer, lda,
+                                                    B_buffer, ldb);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trmm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trmm,
                                         left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                                         A_buffer, lda, B_buffer, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trmm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmm, left_right,
                                         upper_lower, transa, unit_nonunit, m, n, alpha, A_buffer,
                                         lda, B_buffer, ldb);
                 break;
@@ -129,7 +129,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -145,222 +145,222 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     return (int)good;
 }
 
-class TrmmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TrmmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TrmmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                   27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                   27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
 }
 TEST_P(TrmmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
 }
 TEST_P(TrmmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::nontrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 TEST_P(TrmmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrmmTestSuite, TrmmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/trmm_usm.cpp b/tests/unit_tests/blas/level3/trmm_usm.cpp
index 1fa9bbdb0..20469d752 100644
--- a/tests/unit_tests/blas/level3/trmm_usm.cpp
+++ b/tests/unit_tests/blas/level3/trmm_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,9 +47,9 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-         oneapi::mkl::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+         oneapi::math::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,12 +72,12 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     // Prepare data.
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> A(ua), B(ua);
-    if (left_right == oneapi::mkl::side::right)
+    if (left_right == oneapi::math::side::right)
         rand_matrix(A, layout, transa, n, n, lda);
     else
         rand_matrix(A, layout, transa, m, m, lda);
 
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
 
     auto B_ref = B;
 
@@ -97,13 +97,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::trmm(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::trmm(
                     main_queue, left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                     A.data(), lda, B.data(), ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::trmm(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::trmm(
                     main_queue, left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                     A.data(), lda, B.data(), ldb, dependencies);
                 break;
@@ -112,13 +112,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trmm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trmm,
                                         left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                                         A.data(), lda, B.data(), ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trmm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmm, left_right,
                                         upper_lower, transa, unit_nonunit, m, n, alpha, A.data(),
                                         lda, B.data(), ldb, dependencies);
                 break;
@@ -132,7 +132,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -148,221 +148,221 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 }
 
 class TrmmUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TrmmUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                   27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                   27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
 }
 TEST_P(TrmmUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
 }
 TEST_P(TrmmUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::nontrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 TEST_P(TrmmUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-        27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrmmUsmTestSuite, TrmmUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/trsm.cpp b/tests/unit_tests/blas/level3/trsm.cpp
index 90b8d5c93..c10c4c2a5 100644
--- a/tests/unit_tests/blas/level3/trsm.cpp
+++ b/tests/unit_tests/blas/level3/trsm.cpp
@@ -31,9 +31,9 @@
 #endif
 #include "allocator_helper.hpp"
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -48,17 +48,17 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-         oneapi::mkl::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+         oneapi::math::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
     // Prepare data.
     vector<fp, allocator_helper<fp, 64>> A, B, B_ref;
-    if (left_right == oneapi::mkl::side::right)
+    if (left_right == oneapi::math::side::right)
         rand_trsm_matrix(A, layout, transa, n, n, lda);
     else
         rand_trsm_matrix(A, layout, transa, m, m, lda);
 
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
     B_ref = B;
 
     // Call Reference TRSM.
@@ -96,27 +96,27 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                oneapi::mkl::blas::column_major::trsm(main_queue, left_right, upper_lower, transa,
-                                                      unit_nonunit, m, n, alpha, A_buffer, lda,
-                                                      B_buffer, ldb);
+            case oneapi::math::layout::col_major:
+                oneapi::math::blas::column_major::trsm(main_queue, left_right, upper_lower, transa,
+                                                       unit_nonunit, m, n, alpha, A_buffer, lda,
+                                                       B_buffer, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                oneapi::mkl::blas::row_major::trsm(main_queue, left_right, upper_lower, transa,
-                                                   unit_nonunit, m, n, alpha, A_buffer, lda,
-                                                   B_buffer, ldb);
+            case oneapi::math::layout::row_major:
+                oneapi::math::blas::row_major::trsm(main_queue, left_right, upper_lower, transa,
+                                                    unit_nonunit, m, n, alpha, A_buffer, lda,
+                                                    B_buffer, ldb);
                 break;
             default: break;
         }
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsm,
                                         left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                                         A_buffer, lda, B_buffer, ldb);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsm, left_right,
                                         upper_lower, transa, unit_nonunit, m, n, alpha, A_buffer,
                                         lda, B_buffer, ldb);
                 break;
@@ -129,7 +129,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -145,350 +145,350 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     return (int)good;
 }
 
-class TrsmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {
+class TrsmTests : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {
 };
 
 TEST_P(TrsmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
 }
 TEST_P(TrsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
 }
 TEST_P(TrsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::nontrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
+                                                oneapi::math::transpose::nontrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::trans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
+        101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
+                                                oneapi::math::transpose::trans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
+        101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::conjtrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
+                                                oneapi::math::transpose::conjtrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 TEST_P(TrsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrsmTestSuite, TrsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/blas/level3/trsm_usm.cpp b/tests/unit_tests/blas/level3/trsm_usm.cpp
index f84b0ed61..0f2247a14 100644
--- a/tests/unit_tests/blas/level3/trsm_usm.cpp
+++ b/tests/unit_tests/blas/level3/trsm_usm.cpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 #include "cblas.h"
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "onemkl_blas_helper.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "onemath_blas_helper.hpp"
 #include "reference_blas_templates.hpp"
 #include "test_common.hpp"
 #include "test_helper.hpp"
@@ -47,9 +47,9 @@ extern std::vector<sycl::device*> devices;
 namespace {
 
 template <typename fp>
-int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
-         oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa,
-         oneapi::mkl::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
+int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right,
+         oneapi::math::uplo upper_lower, oneapi::math::transpose transa,
+         oneapi::math::diag unit_nonunit, int m, int n, int lda, int ldb, fp alpha) {
     // Catch asynchronous exceptions.
     auto exception_handler = [](exception_list exceptions) {
         for (std::exception_ptr const& e : exceptions) {
@@ -72,12 +72,12 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     // Prepare data.
     auto ua = usm_allocator<fp, usm::alloc::shared, 64>(cxt, *dev);
     vector<fp, decltype(ua)> A(ua), B(ua);
-    if (left_right == oneapi::mkl::side::right)
+    if (left_right == oneapi::math::side::right)
         rand_trsm_matrix(A, layout, transa, n, n, lda);
     else
         rand_trsm_matrix(A, layout, transa, m, m, lda);
 
-    rand_matrix(B, layout, oneapi::mkl::transpose::nontrans, m, n, ldb);
+    rand_matrix(B, layout, oneapi::math::transpose::nontrans, m, n, ldb);
 
     auto B_ref = B;
 
@@ -97,13 +97,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
     try {
 #ifdef CALL_RT_API
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                done = oneapi::mkl::blas::column_major::trsm(
+            case oneapi::math::layout::col_major:
+                done = oneapi::math::blas::column_major::trsm(
                     main_queue, left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                     A.data(), lda, B.data(), ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                done = oneapi::mkl::blas::row_major::trsm(
+            case oneapi::math::layout::row_major:
+                done = oneapi::math::blas::row_major::trsm(
                     main_queue, left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                     A.data(), lda, B.data(), ldb, dependencies);
                 break;
@@ -112,13 +112,13 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         done.wait();
 #else
         switch (layout) {
-            case oneapi::mkl::layout::col_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsm,
+            case oneapi::math::layout::col_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsm,
                                         left_right, upper_lower, transa, unit_nonunit, m, n, alpha,
                                         A.data(), lda, B.data(), ldb, dependencies);
                 break;
-            case oneapi::mkl::layout::row_major:
-                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsm, left_right,
+            case oneapi::math::layout::row_major:
+                TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsm, left_right,
                                         upper_lower, transa, unit_nonunit, m, n, alpha, A.data(),
                                         lda, B.data(), ldb, dependencies);
                 break;
@@ -132,7 +132,7 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
         print_error_code(e);
     }
 
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         return test_skipped;
     }
 
@@ -149,349 +149,349 @@ int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right,
 }
 
 class TrsmUsmTests
-        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::mkl::layout>> {};
+        : public ::testing::TestWithParam<std::tuple<sycl::device*, oneapi::math::layout>> {};
 
 TEST_P(TrsmUsmTests, RealSinglePrecision) {
     float alpha(2.0);
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                   101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                  27, 101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                  72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::lower,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::left, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<float>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                  oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                  oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-                                  101, 102, alpha));
+                                  oneapi::math::side::right, oneapi::math::uplo::upper,
+                                  oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+                                  27, 101, 102, alpha));
 }
 TEST_P(TrsmUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     double alpha(2.0);
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::unit, 72, 27,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
                                    101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
-                                   27, 101, 102, alpha));
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+                                   72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::lower,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::left, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<double>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                   oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                   oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72,
+                                   oneapi::math::side::right, oneapi::math::uplo::upper,
+                                   oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
                                    27, 101, 102, alpha));
 }
 TEST_P(TrsmUsmTests, ComplexSinglePrecision) {
     std::complex<float> alpha(2.0, -0.5);
     EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::nontrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::trans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::transpose::conjtrans,
-                                                oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::nontrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
+                                                oneapi::math::transpose::nontrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::trans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
+        101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
+                                                oneapi::math::transpose::trans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
+        101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::lower,
+                                                oneapi::math::transpose::conjtrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                                                oneapi::math::side::left, oneapi::math::uplo::upper,
+                                                oneapi::math::transpose::conjtrans,
+                                                oneapi::math::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
-        101, 102, alpha));
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<float>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<float>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 TEST_P(TrsmUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam()));
 
     std::complex<double> alpha(2.0, -0.5);
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::nontrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::trans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::lower,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::left, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
-    EXPECT_TRUEORSKIP(test<std::complex<double>>(std::get<0>(GetParam()), std::get<1>(GetParam()),
-                                                 oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                                                 oneapi::mkl::transpose::conjtrans,
-                                                 oneapi::mkl::diag::unit, 72, 27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::trans, oneapi::mkl::diag::nonunit, 72, 27,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27,
         101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::lower, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::left,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
     EXPECT_TRUEORSKIP(test<std::complex<double>>(
-        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::mkl::side::right,
-        oneapi::mkl::uplo::upper, oneapi::mkl::transpose::conjtrans, oneapi::mkl::diag::nonunit, 72,
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
         27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72,
+        27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
+    EXPECT_TRUEORSKIP(test<std::complex<double>>(
+        std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right,
+        oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit,
+        72, 27, 101, 102, alpha));
 }
 
 INSTANTIATE_TEST_SUITE_P(TrsmUsmTestSuite, TrsmUsmTests,
                          ::testing::Combine(testing::ValuesIn(devices),
-                                            testing::Values(oneapi::mkl::layout::col_major,
-                                                            oneapi::mkl::layout::row_major)),
+                                            testing::Values(oneapi::math::layout::col_major,
+                                                            oneapi::math::layout::row_major)),
                          ::LayoutDeviceNamePrint());
 
 } // anonymous namespace
diff --git a/tests/unit_tests/dft/include/compute_inplace.hpp b/tests/unit_tests/dft/include/compute_inplace.hpp
index 95421a232..bd11ed9d4 100644
--- a/tests/unit_tests/dft/include/compute_inplace.hpp
+++ b/tests/unit_tests/dft/include/compute_inplace.hpp
@@ -17,13 +17,13 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_COMPUTE_INPLACE_HPP
-#define ONEMKL_COMPUTE_INPLACE_HPP
+#ifndef ONEMATH_COMPUTE_INPLACE_HPP
+#define ONEMATH_COMPUTE_INPLACE_HPP
 
 #include "compute_tester.hpp"
-#include <oneapi/mkl/exceptions.hpp>
+#include <oneapi/math/exceptions.hpp>
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_in_place_buffer() {
     if (!init(MemoryAccessModel::buffer)) {
         return test_skipped;
@@ -31,7 +31,7 @@ int DFT_Test<precision, domain>::test_in_place_buffer() {
 
     auto modified_strides_fwd = this->strides_fwd;
     auto modified_strides_bwd = this->strides_bwd;
-    if (domain == oneapi::mkl::dft::domain::REAL) {
+    if (domain == oneapi::math::dft::domain::REAL) {
         // both input and output strides must be set
         auto default_conjuate_strides = get_conjugate_even_complex_strides(sizes);
         std::ptrdiff_t rank = static_cast<std::ptrdiff_t>(sizes.size());
@@ -57,30 +57,30 @@ int DFT_Test<precision, domain>::test_in_place_buffer() {
     auto ref_distance = std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<>());
 
     descriptor_t descriptor{ sizes };
-    descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                         oneapi::mkl::dft::config_value::INPLACE);
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE,
-                             oneapi::mkl::dft::config_value::COMPLEX_COMPLEX);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PACKED_FORMAT,
-                             oneapi::mkl::dft::config_value::CCE_FORMAT);
-    }
-    descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-    descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_distance);
-    descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, backward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                         oneapi::math::dft::config_value::INPLACE);
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
+        descriptor.set_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE,
+                             oneapi::math::dft::config_value::COMPLEX_COMPLEX);
+        descriptor.set_value(oneapi::math::dft::config_param::PACKED_FORMAT,
+                             oneapi::math::dft::config_value::CCE_FORMAT);
+    }
+    descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+    descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, backward_distance);
     if (modified_strides_fwd.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES,
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES,
                              modified_strides_fwd.data());
     }
     if (modified_strides_bwd.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES,
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES,
                              modified_strides_bwd.data());
     }
     commit_descriptor(descriptor, sycl_queue);
 
     std::vector<FwdInputType> inout_host(
         strided_copy(input, sizes, modified_strides_fwd, batches, forward_distance));
-    int real_multiplier = (domain == oneapi::mkl::dft::domain::REAL ? 2 : 1);
+    int real_multiplier = (domain == oneapi::math::dft::domain::REAL ? 2 : 1);
     inout_host.resize(
         cast_unsigned(std::max(forward_distance, real_multiplier * backward_distance) * batches +
                       get_default(modified_strides_bwd, 0, 0L) * real_multiplier));
@@ -88,14 +88,14 @@ int DFT_Test<precision, domain>::test_in_place_buffer() {
     {
         sycl::buffer<FwdInputType, 1> inout_buf{ inout_host };
 
-        oneapi::mkl::dft::compute_forward<descriptor_t, FwdInputType>(descriptor, inout_buf);
+        oneapi::math::dft::compute_forward<descriptor_t, FwdInputType>(descriptor, inout_buf);
 
         {
             auto acc_host = inout_buf.get_host_access();
             auto ptr_host = reinterpret_cast<FwdOutputType*>(acc_host.get_pointer());
             for (std::int64_t i = 0; i < batches; i++) {
                 EXPECT_TRUE(check_equal_strided < domain ==
-                            oneapi::mkl::dft::domain::REAL >
+                            oneapi::math::dft::domain::REAL >
                                 (ptr_host + backward_distance * i,
                                  out_host_ref.data() + ref_distance * i, sizes,
                                  modified_strides_bwd, abs_error_margin, rel_error_margin,
@@ -103,8 +103,8 @@ int DFT_Test<precision, domain>::test_in_place_buffer() {
             }
         }
 
-        oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
-                                           FwdInputType>(descriptor, inout_buf);
+        oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                            FwdInputType>(descriptor, inout_buf);
     }
 
     std::vector<FwdInputType> fwd_data_ref = input;
@@ -121,7 +121,7 @@ int DFT_Test<precision, domain>::test_in_place_buffer() {
     return !::testing::Test::HasFailure();
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_in_place_USM() {
     if (!init(MemoryAccessModel::usm)) {
         return test_skipped;
@@ -129,7 +129,7 @@ int DFT_Test<precision, domain>::test_in_place_USM() {
 
     auto modified_strides_fwd = this->strides_fwd;
     auto modified_strides_bwd = this->strides_bwd;
-    if (domain == oneapi::mkl::dft::domain::REAL) {
+    if (domain == oneapi::math::dft::domain::REAL) {
         // both input and output strides must be set
         auto default_conjuate_strides = get_conjugate_even_complex_strides(sizes);
         std::ptrdiff_t rank = static_cast<std::ptrdiff_t>(sizes.size());
@@ -155,23 +155,23 @@ int DFT_Test<precision, domain>::test_in_place_USM() {
     auto ref_distance = std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<>());
 
     descriptor_t descriptor = { sizes };
-    descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                         oneapi::mkl::dft::config_value::INPLACE);
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE,
-                             oneapi::mkl::dft::config_value::COMPLEX_COMPLEX);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PACKED_FORMAT,
-                             oneapi::mkl::dft::config_value::CCE_FORMAT);
-    }
-    descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-    descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_distance);
-    descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, backward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                         oneapi::math::dft::config_value::INPLACE);
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
+        descriptor.set_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE,
+                             oneapi::math::dft::config_value::COMPLEX_COMPLEX);
+        descriptor.set_value(oneapi::math::dft::config_param::PACKED_FORMAT,
+                             oneapi::math::dft::config_value::CCE_FORMAT);
+    }
+    descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+    descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, backward_distance);
     if (modified_strides_fwd.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES,
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES,
                              modified_strides_fwd.data());
     }
     if (modified_strides_bwd.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES,
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES,
                              modified_strides_bwd.data());
     }
     commit_descriptor(descriptor, sycl_queue);
@@ -180,27 +180,28 @@ int DFT_Test<precision, domain>::test_in_place_USM() {
     std::vector<FwdInputType, decltype(ua_input)> inout(
         strided_copy(input, sizes, modified_strides_fwd, batches, forward_distance, ua_input),
         ua_input);
-    int real_multiplier = (domain == oneapi::mkl::dft::domain::REAL ? 2 : 1);
+    int real_multiplier = (domain == oneapi::math::dft::domain::REAL ? 2 : 1);
     inout.resize(
         cast_unsigned(std::max(forward_distance, real_multiplier * backward_distance) * batches +
                       real_multiplier * get_default(modified_strides_bwd, 0, 0L)));
 
     std::vector<sycl::event> no_dependencies;
-    oneapi::mkl::dft::compute_forward<descriptor_t, FwdInputType>(descriptor, inout.data(),
-                                                                  no_dependencies)
+    oneapi::math::dft::compute_forward<descriptor_t, FwdInputType>(descriptor, inout.data(),
+                                                                   no_dependencies)
         .wait_and_throw();
 
     for (std::int64_t i = 0; i < batches; i++) {
         EXPECT_TRUE(check_equal_strided < domain ==
-                    oneapi::mkl::dft::domain::REAL >
+                    oneapi::math::dft::domain::REAL >
                         (reinterpret_cast<FwdOutputType*>(inout.data()) + backward_distance * i,
                          out_host_ref.data() + ref_distance * i, sizes, modified_strides_bwd,
                          abs_error_margin, rel_error_margin, std::cout));
     }
 
     sycl::event done =
-        oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
-                                           FwdInputType>(descriptor, inout.data(), no_dependencies);
+        oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                            FwdInputType>(descriptor, inout.data(),
+                                                          no_dependencies);
     done.wait_and_throw();
 
     std::for_each(input.begin(), input.end(),
@@ -215,4 +216,4 @@ int DFT_Test<precision, domain>::test_in_place_USM() {
     return !::testing::Test::HasFailure();
 }
 
-#endif //ONEMKL_COMPUTE_INPLACE_HPP
+#endif //ONEMATH_COMPUTE_INPLACE_HPP
diff --git a/tests/unit_tests/dft/include/compute_inplace_real_real.hpp b/tests/unit_tests/dft/include/compute_inplace_real_real.hpp
index d4af1a44a..263cc622f 100644
--- a/tests/unit_tests/dft/include/compute_inplace_real_real.hpp
+++ b/tests/unit_tests/dft/include/compute_inplace_real_real.hpp
@@ -17,17 +17,17 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_COMPUTE_INPLACE_REAL_REAL_HPP
-#define ONEMKL_COMPUTE_INPLACE_REAL_REAL_HPP
+#ifndef ONEMATH_COMPUTE_INPLACE_REAL_REAL_HPP
+#define ONEMATH_COMPUTE_INPLACE_REAL_REAL_HPP
 
 #include "compute_tester.hpp"
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_in_place_real_real_USM() {
     if (!init(MemoryAccessModel::usm)) {
         return test_skipped;
     }
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
         std::cout << "skipping real split tests as they are not supported" << std::endl;
 
         return test_skipped;
@@ -35,14 +35,14 @@ int DFT_Test<precision, domain>::test_in_place_real_real_USM() {
     else {
         descriptor_t descriptor{ sizes };
         PrecisionType backward_scale = 1.f / static_cast<PrecisionType>(forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                             oneapi::mkl::dft::config_value::INPLACE);
-        descriptor.set_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE,
-                             oneapi::mkl::dft::config_value::REAL_REAL);
-        descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE, backward_scale);
+        descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                             oneapi::math::dft::config_value::INPLACE);
+        descriptor.set_value(oneapi::math::dft::config_param::COMPLEX_STORAGE,
+                             oneapi::math::dft::config_value::REAL_REAL);
+        descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BACKWARD_SCALE, backward_scale);
 
         commit_descriptor(descriptor, sycl_queue);
 
@@ -54,7 +54,7 @@ int DFT_Test<precision, domain>::test_in_place_real_real_USM() {
         std::copy(input_im.begin(), input_im.end(), inout_im.begin());
 
         std::vector<sycl::event> no_dependencies;
-        oneapi::mkl::dft::compute_forward<descriptor_t, PrecisionType>(
+        oneapi::math::dft::compute_forward<descriptor_t, PrecisionType>(
             descriptor, inout_re.data(), inout_im.data(), no_dependencies)
             .wait_and_throw();
 
@@ -65,9 +65,9 @@ int DFT_Test<precision, domain>::test_in_place_real_real_USM() {
         EXPECT_TRUE(check_equal_vector(output_data.data(), out_host_ref.data(), output_data.size(),
                                        abs_error_margin, rel_error_margin, std::cout));
 
-        oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
-                                           PrecisionType>(descriptor, inout_re.data(),
-                                                          inout_im.data(), no_dependencies)
+        oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                            PrecisionType>(descriptor, inout_re.data(),
+                                                           inout_im.data(), no_dependencies)
             .wait_and_throw();
 
         for (std::size_t i = 0; i < output_data.size(); ++i) {
@@ -81,13 +81,13 @@ int DFT_Test<precision, domain>::test_in_place_real_real_USM() {
     }
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_in_place_real_real_buffer() {
     if (!init(MemoryAccessModel::buffer)) {
         return test_skipped;
     }
 
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
         std::cout << "skipping real split tests as they are not supported" << std::endl;
 
         return test_skipped;
@@ -96,14 +96,14 @@ int DFT_Test<precision, domain>::test_in_place_real_real_buffer() {
         descriptor_t descriptor{ sizes };
 
         PrecisionType backward_scale = 1.f / static_cast<PrecisionType>(forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                             oneapi::mkl::dft::config_value::INPLACE);
-        descriptor.set_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE,
-                             oneapi::mkl::dft::config_value::REAL_REAL);
-        descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE, backward_scale);
+        descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                             oneapi::math::dft::config_value::INPLACE);
+        descriptor.set_value(oneapi::math::dft::config_param::COMPLEX_STORAGE,
+                             oneapi::math::dft::config_value::REAL_REAL);
+        descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BACKWARD_SCALE, backward_scale);
 
         commit_descriptor(descriptor, sycl_queue);
 
@@ -117,8 +117,8 @@ int DFT_Test<precision, domain>::test_in_place_real_real_buffer() {
         sycl::buffer<PrecisionType, 1> inout_im_buf{ host_inout_im.data(),
                                                      sycl::range<1>(size_total) };
 
-        oneapi::mkl::dft::compute_forward<descriptor_t, PrecisionType>(descriptor, inout_re_buf,
-                                                                       inout_im_buf);
+        oneapi::math::dft::compute_forward<descriptor_t, PrecisionType>(descriptor, inout_re_buf,
+                                                                        inout_im_buf);
 
         {
             auto acc_inout_re = inout_re_buf.get_host_access();
@@ -132,8 +132,8 @@ int DFT_Test<precision, domain>::test_in_place_real_real_buffer() {
                                            std::cout));
         }
 
-        oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
-                                           PrecisionType>(descriptor, inout_re_buf, inout_im_buf);
+        oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                            PrecisionType>(descriptor, inout_re_buf, inout_im_buf);
 
         {
             auto acc_inout_re = inout_re_buf.get_host_access();
@@ -149,4 +149,4 @@ int DFT_Test<precision, domain>::test_in_place_real_real_buffer() {
     }
 }
 
-#endif //ONEMKL_COMPUTE_INPLACE_REAL_REAL_HPP
+#endif //ONEMATH_COMPUTE_INPLACE_REAL_REAL_HPP
diff --git a/tests/unit_tests/dft/include/compute_out_of_place.hpp b/tests/unit_tests/dft/include/compute_out_of_place.hpp
index 0d2041dc1..284a68e80 100644
--- a/tests/unit_tests/dft/include/compute_out_of_place.hpp
+++ b/tests/unit_tests/dft/include/compute_out_of_place.hpp
@@ -17,13 +17,13 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_COMPUTE_OUT_OF_PLACE_HPP
-#define ONEMKL_COMPUTE_OUT_OF_PLACE_HPP
+#ifndef ONEMATH_COMPUTE_OUT_OF_PLACE_HPP
+#define ONEMATH_COMPUTE_OUT_OF_PLACE_HPP
 
 #include "compute_tester.hpp"
 #include <numeric>
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_out_of_place_buffer() {
     if (!init(MemoryAccessModel::buffer)) {
         return test_skipped;
@@ -33,33 +33,33 @@ int DFT_Test<precision, domain>::test_out_of_place_buffer() {
     auto strides_fwd_cpy = strides_fwd;
     auto strides_bwd_cpy = strides_bwd;
     if (strides_fwd_cpy.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
     }
     else {
         strides_fwd_cpy.resize(sizes.size() + 1);
-        descriptor.get_value(oneapi::mkl::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
+        descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
     }
     if (strides_bwd_cpy.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
     }
     else {
         strides_bwd_cpy.resize(sizes.size() + 1);
-        descriptor.get_value(oneapi::mkl::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
+        descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
     }
     auto [forward_distance, backward_distance] =
         get_default_distances<domain>(sizes, strides_fwd_cpy, strides_bwd_cpy);
     auto ref_distance = std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<>());
-    descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                         oneapi::mkl::dft::config_value::NOT_INPLACE);
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE,
-                             oneapi::mkl::dft::config_value::COMPLEX_COMPLEX);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PACKED_FORMAT,
-                             oneapi::mkl::dft::config_value::CCE_FORMAT);
-    }
-    descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-    descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_distance);
-    descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, backward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                         oneapi::math::dft::config_value::NOT_INPLACE);
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
+        descriptor.set_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE,
+                             oneapi::math::dft::config_value::COMPLEX_COMPLEX);
+        descriptor.set_value(oneapi::math::dft::config_param::PACKED_FORMAT,
+                             oneapi::math::dft::config_value::CCE_FORMAT);
+    }
+    descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+    descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, backward_distance);
     commit_descriptor(descriptor, sycl_queue);
     std::vector<FwdInputType> fwd_data(
         strided_copy(input, sizes, strides_fwd_cpy, batches, forward_distance));
@@ -70,7 +70,7 @@ int DFT_Test<precision, domain>::test_out_of_place_buffer() {
         sycl::buffer<FwdInputType, 1> fwd_buf{ fwd_data };
         sycl::buffer<FwdOutputType, 1> bwd_buf{ tmp };
 
-        oneapi::mkl::dft::compute_forward<descriptor_t, FwdInputType, FwdOutputType>(
+        oneapi::math::dft::compute_forward<descriptor_t, FwdInputType, FwdOutputType>(
             descriptor, fwd_buf, bwd_buf);
 
         {
@@ -78,16 +78,16 @@ int DFT_Test<precision, domain>::test_out_of_place_buffer() {
             auto bwd_ptr = acc_bwd.get_pointer();
             for (std::int64_t i = 0; i < batches; i++) {
                 EXPECT_TRUE(check_equal_strided < domain ==
-                            oneapi::mkl::dft::domain::REAL >
+                            oneapi::math::dft::domain::REAL >
                                 (bwd_ptr + backward_distance * i,
                                  out_host_ref.data() + ref_distance * i, sizes, strides_bwd_cpy,
                                  abs_error_margin, rel_error_margin, std::cout));
             }
         }
 
-        oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
-                                           FwdOutputType, FwdInputType>(descriptor, bwd_buf,
-                                                                        fwd_buf);
+        oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                            FwdOutputType, FwdInputType>(descriptor, bwd_buf,
+                                                                         fwd_buf);
     }
 
     // account for scaling that occurs during DFT
@@ -103,7 +103,7 @@ int DFT_Test<precision, domain>::test_out_of_place_buffer() {
     return !::testing::Test::HasFailure();
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_out_of_place_USM() {
     if (!init(MemoryAccessModel::usm)) {
         return test_skipped;
@@ -114,41 +114,41 @@ int DFT_Test<precision, domain>::test_out_of_place_USM() {
     auto strides_fwd_cpy = strides_fwd;
     auto strides_bwd_cpy = strides_bwd;
     if (strides_fwd_cpy.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
     }
     else {
         strides_fwd_cpy.resize(sizes.size() + 1);
-        descriptor.get_value(oneapi::mkl::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
+        descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES, strides_fwd_cpy.data());
     }
     if (strides_bwd_cpy.size()) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
     }
     else {
         strides_bwd_cpy.resize(sizes.size() + 1);
-        descriptor.get_value(oneapi::mkl::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
+        descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES, strides_bwd_cpy.data());
     }
     auto [forward_distance, backward_distance] =
         get_default_distances<domain>(sizes, strides_fwd_cpy, strides_bwd_cpy);
     auto ref_distance = std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<>());
-    descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                         oneapi::mkl::dft::config_value::NOT_INPLACE);
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE,
-                             oneapi::mkl::dft::config_value::COMPLEX_COMPLEX);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PACKED_FORMAT,
-                             oneapi::mkl::dft::config_value::CCE_FORMAT);
-    }
-    descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                         oneapi::mkl::dft::config_value::NOT_INPLACE);
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
-        descriptor.set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE,
-                             oneapi::mkl::dft::config_value::COMPLEX_COMPLEX);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PACKED_FORMAT,
-                             oneapi::mkl::dft::config_value::CCE_FORMAT);
-    }
-    descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-    descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_distance);
-    descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, backward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                         oneapi::math::dft::config_value::NOT_INPLACE);
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
+        descriptor.set_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE,
+                             oneapi::math::dft::config_value::COMPLEX_COMPLEX);
+        descriptor.set_value(oneapi::math::dft::config_param::PACKED_FORMAT,
+                             oneapi::math::dft::config_value::CCE_FORMAT);
+    }
+    descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                         oneapi::math::dft::config_value::NOT_INPLACE);
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
+        descriptor.set_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE,
+                             oneapi::math::dft::config_value::COMPLEX_COMPLEX);
+        descriptor.set_value(oneapi::math::dft::config_param::PACKED_FORMAT,
+                             oneapi::math::dft::config_value::CCE_FORMAT);
+    }
+    descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+    descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_distance);
+    descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, backward_distance);
     commit_descriptor(descriptor, sycl_queue);
 
     auto ua_input = usm_allocator_t<FwdInputType>(cxt, *dev);
@@ -160,21 +160,21 @@ int DFT_Test<precision, domain>::test_out_of_place_USM() {
         cast_unsigned(backward_distance * batches + get_default(strides_bwd_cpy, 0, 0L)),
         ua_output);
 
-    oneapi::mkl::dft::compute_forward<descriptor_t, FwdInputType, FwdOutputType>(
+    oneapi::math::dft::compute_forward<descriptor_t, FwdInputType, FwdOutputType>(
         descriptor, fwd.data(), bwd.data(), no_dependencies)
         .wait_and_throw();
 
     auto bwd_ptr = &bwd[0];
     for (std::int64_t i = 0; i < batches; i++) {
         EXPECT_TRUE(check_equal_strided < domain ==
-                    oneapi::mkl::dft::domain::REAL >
+                    oneapi::math::dft::domain::REAL >
                         (bwd_ptr + backward_distance * i, out_host_ref.data() + ref_distance * i,
                          sizes, strides_bwd_cpy, abs_error_margin, rel_error_margin, std::cout));
     }
 
-    oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>, FwdOutputType,
-                                       FwdInputType>(descriptor, bwd.data(), fwd.data(),
-                                                     no_dependencies)
+    oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                        FwdOutputType, FwdInputType>(descriptor, bwd.data(),
+                                                                     fwd.data(), no_dependencies)
         .wait_and_throw();
 
     // account for scaling that occurs during DFT
@@ -190,4 +190,4 @@ int DFT_Test<precision, domain>::test_out_of_place_USM() {
     return !::testing::Test::HasFailure();
 }
 
-#endif //ONEMKL_COMPUTE_OUT_OF_PLACE_HPP
+#endif //ONEMATH_COMPUTE_OUT_OF_PLACE_HPP
diff --git a/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp b/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp
index fb3ecb4f2..9f756f5cf 100644
--- a/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp
+++ b/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp
@@ -17,18 +17,18 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_COMPUTE_OUT_OF_PLACE_REAL_REAL_HPP
-#define ONEMKL_COMPUTE_OUT_OF_PLACE_REAL_REAL_HPP
+#ifndef ONEMATH_COMPUTE_OUT_OF_PLACE_REAL_REAL_HPP
+#define ONEMATH_COMPUTE_OUT_OF_PLACE_REAL_REAL_HPP
 
 #include "compute_tester.hpp"
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_out_of_place_real_real_USM() {
     if (!init(MemoryAccessModel::usm)) {
         return test_skipped;
     }
 
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
         std::cout << "skipping real split tests as they are not supported" << std::endl;
 
         return test_skipped;
@@ -37,14 +37,14 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_USM() {
         descriptor_t descriptor{ sizes };
 
         PrecisionType backward_scale = 1.f / static_cast<PrecisionType>(forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                             oneapi::mkl::dft::config_value::NOT_INPLACE);
-        descriptor.set_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE,
-                             oneapi::mkl::dft::config_value::REAL_REAL);
-        descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE, backward_scale);
+        descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                             oneapi::math::dft::config_value::NOT_INPLACE);
+        descriptor.set_value(oneapi::math::dft::config_param::COMPLEX_STORAGE,
+                             oneapi::math::dft::config_value::REAL_REAL);
+        descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BACKWARD_SCALE, backward_scale);
 
         commit_descriptor(descriptor, sycl_queue);
 
@@ -63,7 +63,7 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_USM() {
 
         std::vector<sycl::event> no_dependencies;
 
-        oneapi::mkl::dft::compute_forward<descriptor_t, PrecisionType, PrecisionType>(
+        oneapi::math::dft::compute_forward<descriptor_t, PrecisionType, PrecisionType>(
             descriptor, in_re.data(), in_im.data(), out_re.data(), out_im.data(), no_dependencies)
             .wait_and_throw();
         std::vector<FwdOutputType> output_data(size_total);
@@ -73,8 +73,8 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_USM() {
         EXPECT_TRUE(check_equal_vector(output_data.data(), out_host_ref.data(), output_data.size(),
                                        abs_error_margin, rel_error_margin, std::cout));
 
-        oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
-                                           PrecisionType, PrecisionType>(
+        oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                            PrecisionType, PrecisionType>(
             descriptor, out_re.data(), out_im.data(), out_back_re.data(), out_back_im.data(),
             no_dependencies)
             .wait_and_throw();
@@ -90,13 +90,13 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_USM() {
     return !::testing::Test::HasFailure();
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int DFT_Test<precision, domain>::test_out_of_place_real_real_buffer() {
     if (!init(MemoryAccessModel::buffer)) {
         return test_skipped;
     }
 
-    if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
+    if constexpr (domain == oneapi::math::dft::domain::REAL) {
         std::cout << "skipping real split tests as they are not supported" << std::endl;
 
         return test_skipped;
@@ -105,14 +105,14 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_buffer() {
         descriptor_t descriptor{ sizes };
 
         PrecisionType backward_scale = 1.f / static_cast<PrecisionType>(forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                             oneapi::mkl::dft::config_value::NOT_INPLACE);
-        descriptor.set_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE,
-                             oneapi::mkl::dft::config_value::REAL_REAL);
-        descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, forward_elements);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE, backward_scale);
+        descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                             oneapi::math::dft::config_value::NOT_INPLACE);
+        descriptor.set_value(oneapi::math::dft::config_param::COMPLEX_STORAGE,
+                             oneapi::math::dft::config_value::REAL_REAL);
+        descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS, batches);
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, forward_elements);
+        descriptor.set_value(oneapi::math::dft::config_param::BACKWARD_SCALE, backward_scale);
 
         commit_descriptor(descriptor, sycl_queue);
 
@@ -123,7 +123,7 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_buffer() {
         sycl::buffer<PrecisionType, 1> out_back_dev_re{ sycl::range<1>(size_total) };
         sycl::buffer<PrecisionType, 1> out_back_dev_im{ sycl::range<1>(size_total) };
 
-        oneapi::mkl::dft::compute_forward<descriptor_t, PrecisionType, PrecisionType>(
+        oneapi::math::dft::compute_forward<descriptor_t, PrecisionType, PrecisionType>(
             descriptor, in_dev_re, in_dev_im, out_dev_re, out_dev_im);
 
         {
@@ -138,8 +138,8 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_buffer() {
                                            std::cout));
         }
 
-        oneapi::mkl::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
-                                           PrecisionType, PrecisionType>(
+        oneapi::math::dft::compute_backward<std::remove_reference_t<decltype(descriptor)>,
+                                            PrecisionType, PrecisionType>(
             descriptor, out_dev_re, out_dev_im, out_back_dev_re, out_back_dev_im);
 
         {
@@ -157,4 +157,4 @@ int DFT_Test<precision, domain>::test_out_of_place_real_real_buffer() {
     return !::testing::Test::HasFailure();
 }
 
-#endif //ONEMKL_COMPUTE_OUT_OF_PLACE_REAL_REAL_HPP
+#endif //ONEMATH_COMPUTE_OUT_OF_PLACE_REAL_REAL_HPP
diff --git a/tests/unit_tests/dft/include/compute_tester.hpp b/tests/unit_tests/dft/include/compute_tester.hpp
index 17ffac0cb..96ad1ffa3 100644
--- a/tests/unit_tests/dft/include/compute_tester.hpp
+++ b/tests/unit_tests/dft/include/compute_tester.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_COMPUTE_TESTER_HPP
-#define ONEMKL_COMPUTE_TESTER_HPP
+#ifndef ONEMATH_COMPUTE_TESTER_HPP
+#define ONEMATH_COMPUTE_TESTER_HPP
 
 #include <algorithm>
 
@@ -27,25 +27,25 @@
 #else
 #include <CL/sycl.hpp>
 #endif
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "test_helper.hpp"
 #include "test_common.hpp"
 #include "reference_dft.hpp"
 
 #include <numeric>
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 struct DFT_Test {
-    using descriptor_t = oneapi::mkl::dft::descriptor<precision, domain>;
+    using descriptor_t = oneapi::math::dft::descriptor<precision, domain>;
 
     template <typename ElemT>
     using usm_allocator_t = sycl::usm_allocator<ElemT, sycl::usm::alloc::shared, 64>;
 
     using PrecisionType =
-        typename std::conditional_t<precision == oneapi::mkl::dft::precision::SINGLE, float,
+        typename std::conditional_t<precision == oneapi::math::dft::precision::SINGLE, float,
                                     double>;
 
-    using FwdInputType = typename std::conditional_t<domain == oneapi::mkl::dft::domain::REAL,
+    using FwdInputType = typename std::conditional_t<domain == oneapi::math::dft::domain::REAL,
                                                      PrecisionType, std::complex<PrecisionType>>;
     using FwdOutputType = std::complex<PrecisionType>;
 
@@ -93,7 +93,7 @@ struct DFT_Test {
         out_host_ref = std::vector<FwdOutputType>(size_total);
 
         rand_vector(input, size_total);
-        if constexpr (domain == oneapi::mkl::dft::domain::REAL) {
+        if constexpr (domain == oneapi::math::dft::domain::REAL) {
             for (std::size_t i = 0; i < input.size(); ++i) {
                 input_re[i] = { input[i] };
                 input_im[i] = 0;
@@ -108,7 +108,7 @@ struct DFT_Test {
     }
 
     bool skip_test(MemoryAccessModel mem_acc) {
-        if constexpr (precision == oneapi::mkl::dft::precision::DOUBLE) {
+        if constexpr (precision == oneapi::math::dft::precision::DOUBLE) {
             if (!sycl_queue.get_device().has(sycl::aspect::fp64)) {
                 std::cout << "Device does not support double precision." << std::endl;
                 return true;
@@ -151,4 +151,4 @@ struct DFT_Test {
     int test_out_of_place_real_real_USM();
 };
 
-#endif //ONEMKL_COMPUTE_TESTER_HPP
+#endif //ONEMATH_COMPUTE_TESTER_HPP
diff --git a/tests/unit_tests/dft/include/parseval_check.hpp b/tests/unit_tests/dft/include/parseval_check.hpp
index ece6f7d31..8b8f4dfc5 100644
--- a/tests/unit_tests/dft/include/parseval_check.hpp
+++ b/tests/unit_tests/dft/include/parseval_check.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_PARSEVAL_CHECK_HPP
-#define ONEMKL_PARSEVAL_CHECK_HPP
+#ifndef ONEMATH_PARSEVAL_CHECK_HPP
+#define ONEMATH_PARSEVAL_CHECK_HPP
 
 #include <algorithm>
 #include <cmath>
@@ -78,4 +78,4 @@ bool parseval_check(std::size_t dft_len, const TypeFwd* in, TypeBwd* out,
     }
     return true;
 }
-#endif // ONEMKL_PARSEVAL_CHECK_HPP
+#endif // ONEMATH_PARSEVAL_CHECK_HPP
diff --git a/tests/unit_tests/dft/include/reference_dft.hpp b/tests/unit_tests/dft/include/reference_dft.hpp
index 7114306c6..5c2abcc21 100644
--- a/tests/unit_tests/dft/include/reference_dft.hpp
+++ b/tests/unit_tests/dft/include/reference_dft.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_REFERENCE_DFT_HPP
-#define ONEMKL_REFERENCE_DFT_HPP
+#ifndef ONEMATH_REFERENCE_DFT_HPP
+#define ONEMATH_REFERENCE_DFT_HPP
 
 #include <algorithm>
 #include <cmath>
@@ -26,7 +26,7 @@
 #include <vector>
 #include <numeric>
 
-#include <oneapi/mkl/exceptions.hpp>
+#include <oneapi/math/exceptions.hpp>
 #include "test_common.hpp"
 
 namespace detail {
@@ -121,10 +121,10 @@ void reference_forward_dft(const std::vector<std::int64_t>& sizes, const TypeIn*
         case 2: detail::reference<TypeIn, TypeOut, 2>::forward_dft(unsigned_sizes, in, out); break;
         case 3: detail::reference<TypeIn, TypeOut, 3>::forward_dft(unsigned_sizes, in, out); break;
         default:
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "reference_dft", "forward_dft",
                 "dft with size " + std::to_string(unsigned_sizes.size()));
     }
 }
 
-#endif //ONEMKL_REFERENCE_DFT_HPP
+#endif //ONEMATH_REFERENCE_DFT_HPP
diff --git a/tests/unit_tests/dft/include/test_common.hpp b/tests/unit_tests/dft/include/test_common.hpp
index 4410bdeb2..5b1647e94 100644
--- a/tests/unit_tests/dft/include/test_common.hpp
+++ b/tests/unit_tests/dft/include/test_common.hpp
@@ -17,8 +17,8 @@
 *
 **************************************************************************/
 
-#ifndef ONEMKL_TEST_COMMON_HPP
-#define ONEMKL_TEST_COMMON_HPP
+#ifndef ONEMATH_TEST_COMMON_HPP
+#define ONEMATH_TEST_COMMON_HPP
 
 #include <cstdint>
 #include <cstdlib>
@@ -152,8 +152,8 @@ auto exception_handler = [](sycl::exception_list exceptions) {
     }
 };
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
-void commit_descriptor(oneapi::mkl::dft::descriptor<precision, domain>& descriptor,
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
+void commit_descriptor(oneapi::math::dft::descriptor<precision, domain>& descriptor,
                        sycl::queue queue) {
 #ifdef CALL_RT_API
     descriptor.commit(queue);
@@ -170,7 +170,7 @@ inline std::array<std::int64_t, 4> get_conjugate_even_complex_strides(
         case 2: return { 0, sizes[1] / 2 + 1, 1 };
         case 3: return { 0, sizes[1] * (sizes[2] / 2 + 1), (sizes[2] / 2 + 1), 1 };
         default:
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "dft/test_common", __FUNCTION__,
                 "not implemented for " + std::to_string(sizes.size()) + " dimensions");
             return {};
@@ -180,7 +180,7 @@ inline std::array<std::int64_t, 4> get_conjugate_even_complex_strides(
 // is it assumed that the unused elements of the array are ignored
 inline std::array<std::int64_t, 4> get_default_strides(const std::vector<std::int64_t>& sizes) {
     if (sizes.size() > 3) {
-        throw oneapi::mkl::unimplemented(
+        throw oneapi::math::unimplemented(
             "dft/test_common", __FUNCTION__,
             "not implemented for " + std::to_string(sizes.size()) + " dimensions");
     }
@@ -190,7 +190,7 @@ inline std::array<std::int64_t, 4> get_default_strides(const std::vector<std::in
         case 2: return { 0, sizes[1], 1 };
         case 3: return { 0, sizes[1] * sizes[2], sizes[2], 1 };
         default:
-            throw oneapi::mkl::unimplemented(
+            throw oneapi::math::unimplemented(
                 "dft/test_common", __FUNCTION__,
                 "not implemented for " + std::to_string(sizes.size()) + " dimensions");
             return {};
@@ -205,7 +205,7 @@ T get_default(const std::vector<T> vec, std::size_t idx, T default_) {
     return vec[idx];
 }
 
-template <oneapi::mkl::dft::domain domain, bool in_place = false>
+template <oneapi::math::dft::domain domain, bool in_place = false>
 std::pair<std::int64_t, std::int64_t> get_default_distances(
     const std::vector<std::int64_t>& sizes, const std::vector<std::int64_t>& strides_fwd,
     const std::vector<std::int64_t>& strides_bwd) {
@@ -213,11 +213,11 @@ std::pair<std::int64_t, std::int64_t> get_default_distances(
     std::int64_t size1 = get_default(sizes, 1, 1l);
     std::int64_t size2 = get_default(sizes, 2, 1l);
     std::int64_t size0_real =
-        domain == oneapi::mkl::dft::domain::REAL && sizes.size() == 1 ? size0 / 2 + 1 : size0;
+        domain == oneapi::math::dft::domain::REAL && sizes.size() == 1 ? size0 / 2 + 1 : size0;
     std::int64_t size1_real =
-        domain == oneapi::mkl::dft::domain::REAL && sizes.size() == 2 ? size1 / 2 + 1 : size1;
+        domain == oneapi::math::dft::domain::REAL && sizes.size() == 2 ? size1 / 2 + 1 : size1;
     std::int64_t size2_real =
-        domain == oneapi::mkl::dft::domain::REAL && sizes.size() == 3 ? size2 / 2 + 1 : size2;
+        domain == oneapi::math::dft::domain::REAL && sizes.size() == 3 ? size2 / 2 + 1 : size2;
     std::int64_t backward_distance = size0_real * size1_real * size2_real;
     std::int64_t forward_distance = size0 * size1 * size2;
     if (strides_fwd.size() > 1) {
@@ -233,7 +233,7 @@ std::pair<std::int64_t, std::int64_t> get_default_distances(
     if (in_place) {
         forward_distance =
             std::max(forward_distance,
-                     backward_distance * (domain == oneapi::mkl::dft::domain::REAL ? 2L : 1L));
+                     backward_distance * (domain == oneapi::math::dft::domain::REAL ? 2L : 1L));
     }
     return { forward_distance, backward_distance };
 }
@@ -387,4 +387,4 @@ class DFTParamsPrint {
     }
 };
 
-#endif //ONEMKL_TEST_COMMON_HPP
+#endif //ONEMATH_TEST_COMMON_HPP
diff --git a/tests/unit_tests/dft/source/CMakeLists.txt b/tests/unit_tests/dft/source/CMakeLists.txt
index 364ad564f..1ff996afb 100644
--- a/tests/unit_tests/dft/source/CMakeLists.txt
+++ b/tests/unit_tests/dft/source/CMakeLists.txt
@@ -34,9 +34,9 @@ if (BUILD_SHARED_LIBS)
     if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
         add_sycl_to_target(TARGET dft_source_rt SOURCES ${DFT_SOURCES})
     else ()
-        target_link_libraries(dft_source_rt PUBLIC ONEMKL::SYCL::SYCL)
+        target_link_libraries(dft_source_rt PUBLIC ONEMATH::SYCL::SYCL)
     endif ()
-    target_link_libraries(dft_source_rt PRIVATE onemkl_warnings)
+    target_link_libraries(dft_source_rt PRIVATE onemath_warnings)
 endif ()
 
 add_library(dft_source_ct OBJECT ${DFT_SOURCES})
@@ -51,7 +51,7 @@ target_include_directories(dft_source_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET dft_source_ct SOURCES ${DFT_SOURCES})
 else ()
-    target_link_libraries(dft_source_ct PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(dft_source_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif ()
-target_link_libraries(dft_source_ct PRIVATE onemkl_warnings)
+target_link_libraries(dft_source_ct PRIVATE onemath_warnings)
 
diff --git a/tests/unit_tests/dft/source/compute_tests.cpp b/tests/unit_tests/dft/source/compute_tests.cpp
index eb1d88e7a..349ba2bd7 100644
--- a/tests/unit_tests/dft/source/compute_tests.cpp
+++ b/tests/unit_tests/dft/source/compute_tests.cpp
@@ -61,27 +61,18 @@ class ComputeTests_real_real_out_of_place_REAL
     TEST_P(ComputeTests##_##LAYOUT##PLACE##_##DOMAIN,                                     \
            DOMAIN##_##PRECISION##_##PLACE##_##LAYOUT##STORAGE) {                          \
         try {                                                                             \
-            auto test = DFT_Test<oneapi::mkl::dft::precision::PRECISION,                  \
-                                 oneapi::mkl::dft::domain::DOMAIN>{                       \
+            auto test = DFT_Test<oneapi::math::dft::precision::PRECISION,                 \
+                                 oneapi::math::dft::domain::DOMAIN>{                      \
                 std::get<0>(GetParam()), std::get<1>(GetParam()).sizes,                   \
                 std::get<1>(GetParam()).strides_fwd, std::get<1>(GetParam()).strides_bwd, \
                 std::get<1>(GetParam()).batches                                           \
             };                                                                            \
             EXPECT_TRUEORSKIP(test.test_##PLACE##_##LAYOUT##STORAGE());                   \
         }                                                                                 \
-        catch (oneapi::mkl::unimplemented & e) {                                          \
+        catch (oneapi::math::unimplemented & e) {                                         \
             std::cout << "Skipping test because: \"" << e.what() << "\"" << std::endl;    \
             GTEST_SKIP();                                                                 \
         }                                                                                 \
-        catch (std::exception & e) {                                                      \
-            std::string msg = e.what();                                                   \
-            if ((msg.find("FFT_UNIMPLEMENTED") != std::string::npos) ||                   \
-                (msg.find("Unimplemented") != std::string::npos)) {                       \
-                std::cout << "Skipping test because: \"" << msg << "\"" << std::endl;     \
-                GTEST_SKIP();                                                             \
-            }                                                                             \
-            throw;                                                                        \
-        }                                                                                 \
     }
 
 #define INSTANTIATE_TEST_DIMENSIONS_PRECISION_DOMAIN(PLACE, LAYOUT, STORAGE) \
diff --git a/tests/unit_tests/dft/source/descriptor_tests.cpp b/tests/unit_tests/dft/source/descriptor_tests.cpp
index 155160149..a4290e553 100644
--- a/tests/unit_tests/dft/source/descriptor_tests.cpp
+++ b/tests/unit_tests/dft/source/descriptor_tests.cpp
@@ -41,33 +41,33 @@ namespace {
 constexpr std::int64_t default_1d_lengths = 4;
 const std::vector<std::int64_t> default_3d_lengths{ 124, 5, 3 };
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static void set_and_get_lengths() {
     /* Negative Testing */
     {
-        oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
-        EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::LENGTHS, nullptr),
-                     oneapi::mkl::invalid_argument);
+        oneapi::math::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
+        EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::LENGTHS, nullptr),
+                     oneapi::math::invalid_argument);
     }
 
     /* 1D */
     {
         const std::int64_t dimensions = 1;
-        oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+        oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
 
         const std::int64_t new_lengths{ 2345 };
         std::int64_t lengths_value{ 0 };
         std::int64_t dimensions_before_set{ 0 };
         std::int64_t dimensions_after_set{ 0 };
 
-        descriptor.get_value(oneapi::mkl::dft::config_param::LENGTHS, &lengths_value);
-        descriptor.get_value(oneapi::mkl::dft::config_param::DIMENSION, &dimensions_before_set);
+        descriptor.get_value(oneapi::math::dft::config_param::LENGTHS, &lengths_value);
+        descriptor.get_value(oneapi::math::dft::config_param::DIMENSION, &dimensions_before_set);
         EXPECT_EQ(default_1d_lengths, lengths_value);
         EXPECT_EQ(dimensions, dimensions_before_set);
 
-        descriptor.set_value(oneapi::mkl::dft::config_param::LENGTHS, new_lengths);
-        descriptor.get_value(oneapi::mkl::dft::config_param::LENGTHS, &lengths_value);
-        descriptor.get_value(oneapi::mkl::dft::config_param::DIMENSION, &dimensions_after_set);
+        descriptor.set_value(oneapi::math::dft::config_param::LENGTHS, new_lengths);
+        descriptor.get_value(oneapi::math::dft::config_param::LENGTHS, &lengths_value);
+        descriptor.get_value(oneapi::math::dft::config_param::DIMENSION, &dimensions_after_set);
         EXPECT_EQ(new_lengths, lengths_value);
         EXPECT_EQ(dimensions, dimensions_after_set);
     }
@@ -76,22 +76,22 @@ static void set_and_get_lengths() {
     {
         const std::int64_t dimensions = 3;
 
-        oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
+        oneapi::math::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
 
         std::vector<std::int64_t> lengths_value(3);
         std::vector<std::int64_t> new_lengths{ 1, 2, 7 };
         std::int64_t dimensions_before_set{ 0 };
         std::int64_t dimensions_after_set{ 0 };
 
-        descriptor.get_value(oneapi::mkl::dft::config_param::LENGTHS, lengths_value.data());
-        descriptor.get_value(oneapi::mkl::dft::config_param::DIMENSION, &dimensions_before_set);
+        descriptor.get_value(oneapi::math::dft::config_param::LENGTHS, lengths_value.data());
+        descriptor.get_value(oneapi::math::dft::config_param::DIMENSION, &dimensions_before_set);
 
         EXPECT_EQ(default_3d_lengths, lengths_value);
         EXPECT_EQ(dimensions, dimensions_before_set);
 
-        descriptor.set_value(oneapi::mkl::dft::config_param::LENGTHS, new_lengths.data());
-        descriptor.get_value(oneapi::mkl::dft::config_param::LENGTHS, lengths_value.data());
-        descriptor.get_value(oneapi::mkl::dft::config_param::DIMENSION, &dimensions_after_set);
+        descriptor.set_value(oneapi::math::dft::config_param::LENGTHS, new_lengths.data());
+        descriptor.get_value(oneapi::math::dft::config_param::LENGTHS, lengths_value.data());
+        descriptor.get_value(oneapi::math::dft::config_param::DIMENSION, &dimensions_after_set);
 
         EXPECT_EQ(new_lengths, lengths_value);
         EXPECT_EQ(dimensions, dimensions_after_set);
@@ -101,14 +101,14 @@ static void set_and_get_lengths() {
 // Test for deprecated functionality
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static void set_and_get_io_strides() {
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
 
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, nullptr),
-                 oneapi::mkl::invalid_argument);
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, nullptr),
-                 oneapi::mkl::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::INPUT_STRIDES, nullptr),
+                 oneapi::math::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::OUTPUT_STRIDES, nullptr),
+                 oneapi::math::invalid_argument);
 
     constexpr std::int64_t strides_size = 4;
     const std::int64_t default_stride_d1 = default_3d_lengths[2] * default_3d_lengths[1];
@@ -120,7 +120,7 @@ static void set_and_get_io_strides() {
 
     std::vector<std::int64_t> input_strides_value;
     std::vector<std::int64_t> output_strides_value;
-    if constexpr (domain == oneapi::mkl::dft::domain::COMPLEX) {
+    if constexpr (domain == oneapi::math::dft::domain::COMPLEX) {
         input_strides_value = { 50, default_stride_d1 * 2, default_stride_d2 * 2,
                                 default_stride_d3 * 2 };
         output_strides_value = { 50, default_stride_d1 * 2, default_stride_d2 * 2,
@@ -138,38 +138,41 @@ static void set_and_get_io_strides() {
     std::vector<std::int64_t> fwd_strides_after_set(strides_size, -1);
     std::vector<std::int64_t> bwd_strides_after_set(strides_size, -1);
 
-    descriptor.get_value(oneapi::mkl::dft::config_param::INPUT_STRIDES,
+    descriptor.get_value(oneapi::math::dft::config_param::INPUT_STRIDES,
                          input_strides_before_set.data());
     EXPECT_EQ(std::vector<std::int64_t>(strides_size, 0), input_strides_before_set);
-    descriptor.set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, input_strides_value.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::INPUT_STRIDES,
+    descriptor.set_value(oneapi::math::dft::config_param::INPUT_STRIDES,
+                         input_strides_value.data());
+    descriptor.get_value(oneapi::math::dft::config_param::INPUT_STRIDES,
                          input_strides_after_set.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::FWD_STRIDES, fwd_strides_after_set.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::BWD_STRIDES, bwd_strides_after_set.data());
+    descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES,
+                         fwd_strides_after_set.data());
+    descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES,
+                         bwd_strides_after_set.data());
     EXPECT_EQ(input_strides_value, input_strides_after_set);
     EXPECT_EQ(std::vector<std::int64_t>(strides_size, 0), fwd_strides_after_set);
     EXPECT_EQ(std::vector<std::int64_t>(strides_size, 0), bwd_strides_after_set);
 
     std::vector<std::int64_t> output_strides_before_set(strides_size);
     std::vector<std::int64_t> output_strides_after_set(strides_size);
-    descriptor.get_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES,
+    descriptor.get_value(oneapi::math::dft::config_param::OUTPUT_STRIDES,
                          output_strides_before_set.data());
     EXPECT_EQ(std::vector<std::int64_t>(strides_size, 0), output_strides_before_set);
-    descriptor.set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES,
+    descriptor.set_value(oneapi::math::dft::config_param::OUTPUT_STRIDES,
                          output_strides_value.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES,
+    descriptor.get_value(oneapi::math::dft::config_param::OUTPUT_STRIDES,
                          output_strides_after_set.data());
     EXPECT_EQ(output_strides_value, output_strides_after_set);
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static void set_and_get_fwd_bwd_strides() {
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_3d_lengths };
 
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES, nullptr),
-                 oneapi::mkl::invalid_argument);
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES, nullptr),
-                 oneapi::mkl::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES, nullptr),
+                 oneapi::math::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES, nullptr),
+                 oneapi::math::invalid_argument);
 
     constexpr std::int64_t strides_size = 4;
     const std::int64_t default_stride_d1 = default_3d_lengths[2] * default_3d_lengths[1];
@@ -178,7 +181,7 @@ static void set_and_get_fwd_bwd_strides() {
 
     std::vector<std::int64_t> fwd_strides_default_value;
     std::vector<std::int64_t> bwd_strides_default_value;
-    if constexpr (domain == oneapi::mkl::dft::domain::COMPLEX) {
+    if constexpr (domain == oneapi::math::dft::domain::COMPLEX) {
         fwd_strides_default_value = { 0, default_stride_d1, default_stride_d2, default_stride_d3 };
         bwd_strides_default_value = { 0, default_stride_d1, default_stride_d2, default_stride_d3 };
     }
@@ -203,14 +206,16 @@ static void set_and_get_fwd_bwd_strides() {
     std::vector<std::int64_t> input_strides_after_set(strides_size, -1);
     std::vector<std::int64_t> output_strides_after_set(strides_size, -1);
 
-    descriptor.get_value(oneapi::mkl::dft::config_param::FWD_STRIDES,
+    descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES,
                          fwd_strides_before_set.data());
     EXPECT_EQ(fwd_strides_default_value, fwd_strides_before_set);
-    descriptor.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES, fwd_strides_new_value.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::FWD_STRIDES, fwd_strides_after_set.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::INPUT_STRIDES,
+    descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES,
+                         fwd_strides_new_value.data());
+    descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES,
+                         fwd_strides_after_set.data());
+    descriptor.get_value(oneapi::math::dft::config_param::INPUT_STRIDES,
                          input_strides_after_set.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES,
+    descriptor.get_value(oneapi::math::dft::config_param::OUTPUT_STRIDES,
                          output_strides_after_set.data());
     EXPECT_EQ(fwd_strides_new_value, fwd_strides_after_set);
     EXPECT_EQ(std::vector<std::int64_t>(strides_size, 0), input_strides_after_set);
@@ -218,21 +223,23 @@ static void set_and_get_fwd_bwd_strides() {
 
     std::vector<std::int64_t> bwd_strides_before_set(strides_size);
     std::vector<std::int64_t> bwd_strides_after_set(strides_size);
-    descriptor.get_value(oneapi::mkl::dft::config_param::BWD_STRIDES,
+    descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES,
                          bwd_strides_before_set.data());
     EXPECT_EQ(bwd_strides_default_value, bwd_strides_before_set);
-    descriptor.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES, bwd_strides_new_value.data());
-    descriptor.get_value(oneapi::mkl::dft::config_param::BWD_STRIDES, bwd_strides_after_set.data());
+    descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES,
+                         bwd_strides_new_value.data());
+    descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES,
+                         bwd_strides_after_set.data());
     EXPECT_EQ(bwd_strides_new_value, bwd_strides_after_set);
 }
 #pragma clang diagnostic pop
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static void set_and_get_values() {
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
 
     using Precision_Type =
-        typename std::conditional_t<precision == oneapi::mkl::dft::precision::SINGLE, float,
+        typename std::conditional_t<precision == oneapi::math::dft::precision::SINGLE, float,
                                     double>;
 
     {
@@ -240,12 +247,12 @@ static void set_and_get_values() {
         Precision_Type forward_scale_before_set;
         Precision_Type forward_scale_after_set;
 
-        descriptor.get_value(oneapi::mkl::dft::config_param::FORWARD_SCALE,
+        descriptor.get_value(oneapi::math::dft::config_param::FORWARD_SCALE,
                              &forward_scale_before_set);
         EXPECT_EQ(1.0, forward_scale_before_set);
-        descriptor.set_value(oneapi::mkl::dft::config_param::FORWARD_SCALE,
+        descriptor.set_value(oneapi::math::dft::config_param::FORWARD_SCALE,
                              forward_scale_set_value);
-        descriptor.get_value(oneapi::mkl::dft::config_param::FORWARD_SCALE,
+        descriptor.get_value(oneapi::math::dft::config_param::FORWARD_SCALE,
                              &forward_scale_after_set);
         EXPECT_EQ(forward_scale_set_value, forward_scale_after_set);
     }
@@ -255,12 +262,12 @@ static void set_and_get_values() {
         Precision_Type backward_scale_before_set;
         Precision_Type backward_scale_after_set;
 
-        descriptor.get_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE,
+        descriptor.get_value(oneapi::math::dft::config_param::BACKWARD_SCALE,
                              &backward_scale_before_set);
         EXPECT_EQ(1.0, backward_scale_before_set);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE,
+        descriptor.set_value(oneapi::math::dft::config_param::BACKWARD_SCALE,
                              backward_scale_set_value);
-        descriptor.get_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE,
+        descriptor.get_value(oneapi::math::dft::config_param::BACKWARD_SCALE,
                              &backward_scale_after_set);
         EXPECT_EQ(backward_scale_set_value, backward_scale_after_set);
     }
@@ -270,12 +277,12 @@ static void set_and_get_values() {
         std::int64_t n_transforms_before_set;
         std::int64_t n_transforms_after_set;
 
-        descriptor.get_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
+        descriptor.get_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS,
                              &n_transforms_before_set);
         EXPECT_EQ(1, n_transforms_before_set);
-        descriptor.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
+        descriptor.set_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS,
                              n_transforms_set_value);
-        descriptor.get_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
+        descriptor.get_value(oneapi::math::dft::config_param::NUMBER_OF_TRANSFORMS,
                              &n_transforms_after_set);
         EXPECT_EQ(n_transforms_set_value, n_transforms_after_set);
     }
@@ -285,236 +292,238 @@ static void set_and_get_values() {
         std::int64_t fwd_distance_before_set;
         std::int64_t fwd_distance_after_set;
 
-        descriptor.get_value(oneapi::mkl::dft::config_param::FWD_DISTANCE,
+        descriptor.get_value(oneapi::math::dft::config_param::FWD_DISTANCE,
                              &fwd_distance_before_set);
         EXPECT_EQ(1, fwd_distance_before_set);
-        descriptor.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distance_set_value);
-        descriptor.get_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, &fwd_distance_after_set);
+        descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, fwd_distance_set_value);
+        descriptor.get_value(oneapi::math::dft::config_param::FWD_DISTANCE,
+                             &fwd_distance_after_set);
         EXPECT_EQ(fwd_distance_set_value, fwd_distance_after_set);
 
-        std::int64_t bwd_distance_set_value{ domain == oneapi::mkl::dft::domain::REAL
+        std::int64_t bwd_distance_set_value{ domain == oneapi::math::dft::domain::REAL
                                                  ? (fwd_distance_set_value / 2) + 1
                                                  : fwd_distance_set_value };
         std::int64_t bwd_distance_before_set;
         std::int64_t bwd_distance_after_set;
 
-        descriptor.get_value(oneapi::mkl::dft::config_param::BWD_DISTANCE,
+        descriptor.get_value(oneapi::math::dft::config_param::BWD_DISTANCE,
                              &bwd_distance_before_set);
         EXPECT_EQ(1, bwd_distance_before_set);
-        descriptor.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distance_set_value);
-        descriptor.get_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, &bwd_distance_after_set);
+        descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, bwd_distance_set_value);
+        descriptor.get_value(oneapi::math::dft::config_param::BWD_DISTANCE,
+                             &bwd_distance_after_set);
         EXPECT_EQ(bwd_distance_set_value, bwd_distance_after_set);
     }
 
     {
-        oneapi::mkl::dft::config_value value{
-            oneapi::mkl::dft::config_value::COMMITTED
+        oneapi::math::dft::config_value value{
+            oneapi::math::dft::config_value::COMMITTED
         }; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::PLACEMENT, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::INPLACE, value);
-
-        descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                             oneapi::mkl::dft::config_value::NOT_INPLACE);
-        descriptor.get_value(oneapi::mkl::dft::config_param::PLACEMENT, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::NOT_INPLACE, value);
-
-        descriptor.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
-                             oneapi::mkl::dft::config_value::INPLACE);
-        descriptor.get_value(oneapi::mkl::dft::config_param::PLACEMENT, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::INPLACE, value);
+        descriptor.get_value(oneapi::math::dft::config_param::PLACEMENT, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::INPLACE, value);
+
+        descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                             oneapi::math::dft::config_value::NOT_INPLACE);
+        descriptor.get_value(oneapi::math::dft::config_param::PLACEMENT, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::NOT_INPLACE, value);
+
+        descriptor.set_value(oneapi::math::dft::config_param::PLACEMENT,
+                             oneapi::math::dft::config_value::INPLACE);
+        descriptor.get_value(oneapi::math::dft::config_param::PLACEMENT, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::INPLACE, value);
     }
 
     {
-        oneapi::mkl::dft::config_value value{
-            oneapi::mkl::dft::config_value::COMMITTED
+        oneapi::math::dft::config_value value{
+            oneapi::math::dft::config_value::COMMITTED
         }; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::COMPLEX_COMPLEX, value);
-
-        descriptor.set_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE,
-                             oneapi::mkl::dft::config_value::REAL_REAL);
-        descriptor.get_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::REAL_REAL, value);
-
-        descriptor.set_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE,
-                             oneapi::mkl::dft::config_value::COMPLEX_COMPLEX);
-        descriptor.get_value(oneapi::mkl::dft::config_param::COMPLEX_STORAGE, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::COMPLEX_COMPLEX, value);
+        descriptor.get_value(oneapi::math::dft::config_param::COMPLEX_STORAGE, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::COMPLEX_COMPLEX, value);
+
+        descriptor.set_value(oneapi::math::dft::config_param::COMPLEX_STORAGE,
+                             oneapi::math::dft::config_value::REAL_REAL);
+        descriptor.get_value(oneapi::math::dft::config_param::COMPLEX_STORAGE, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::REAL_REAL, value);
+
+        descriptor.set_value(oneapi::math::dft::config_param::COMPLEX_STORAGE,
+                             oneapi::math::dft::config_value::COMPLEX_COMPLEX);
+        descriptor.get_value(oneapi::math::dft::config_param::COMPLEX_STORAGE, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::COMPLEX_COMPLEX, value);
     }
 
     {
-        oneapi::mkl::dft::config_value value{
-            oneapi::mkl::dft::config_value::COMMITTED
+        oneapi::math::dft::config_value value{
+            oneapi::math::dft::config_value::COMMITTED
         }; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::COMPLEX_COMPLEX, value);
+        descriptor.get_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::COMPLEX_COMPLEX, value);
 
-        descriptor.set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE,
-                             oneapi::mkl::dft::config_value::COMPLEX_COMPLEX);
+        descriptor.set_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE,
+                             oneapi::math::dft::config_value::COMPLEX_COMPLEX);
 
-        value = oneapi::mkl::dft::config_value::COMMITTED; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::COMPLEX_COMPLEX, value);
+        value = oneapi::math::dft::config_value::COMMITTED; // Initialize with invalid value
+        descriptor.get_value(oneapi::math::dft::config_param::CONJUGATE_EVEN_STORAGE, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::COMPLEX_COMPLEX, value);
     }
 
     {
-        oneapi::mkl::dft::config_value value{
-            oneapi::mkl::dft::config_value::COMMITTED
+        oneapi::math::dft::config_value value{
+            oneapi::math::dft::config_value::COMMITTED
         }; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::REAL_STORAGE, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::REAL_REAL, value);
+        descriptor.get_value(oneapi::math::dft::config_param::REAL_STORAGE, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::REAL_REAL, value);
 
-        descriptor.set_value(oneapi::mkl::dft::config_param::REAL_STORAGE,
-                             oneapi::mkl::dft::config_value::REAL_REAL);
+        descriptor.set_value(oneapi::math::dft::config_param::REAL_STORAGE,
+                             oneapi::math::dft::config_value::REAL_REAL);
 
-        value = oneapi::mkl::dft::config_value::COMMITTED; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::REAL_STORAGE, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::REAL_REAL, value);
+        value = oneapi::math::dft::config_value::COMMITTED; // Initialize with invalid value
+        descriptor.get_value(oneapi::math::dft::config_param::REAL_STORAGE, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::REAL_REAL, value);
     }
 
     {
-        oneapi::mkl::dft::config_value value{
-            oneapi::mkl::dft::config_value::COMMITTED
+        oneapi::math::dft::config_value value{
+            oneapi::math::dft::config_value::COMMITTED
         }; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::ORDERING, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::ORDERED, value);
-
-        descriptor.set_value(oneapi::mkl::dft::config_param::ORDERING,
-                             oneapi::mkl::dft::config_value::BACKWARD_SCRAMBLED);
-        descriptor.get_value(oneapi::mkl::dft::config_param::ORDERING, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::BACKWARD_SCRAMBLED, value);
-
-        descriptor.set_value(oneapi::mkl::dft::config_param::ORDERING,
-                             oneapi::mkl::dft::config_value::ORDERED);
-        descriptor.get_value(oneapi::mkl::dft::config_param::ORDERING, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::ORDERED, value);
+        descriptor.get_value(oneapi::math::dft::config_param::ORDERING, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::ORDERED, value);
+
+        descriptor.set_value(oneapi::math::dft::config_param::ORDERING,
+                             oneapi::math::dft::config_value::BACKWARD_SCRAMBLED);
+        descriptor.get_value(oneapi::math::dft::config_param::ORDERING, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::BACKWARD_SCRAMBLED, value);
+
+        descriptor.set_value(oneapi::math::dft::config_param::ORDERING,
+                             oneapi::math::dft::config_value::ORDERED);
+        descriptor.get_value(oneapi::math::dft::config_param::ORDERING, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::ORDERED, value);
     }
 
     {
         bool value = true;
-        descriptor.get_value(oneapi::mkl::dft::config_param::TRANSPOSE, &value);
+        descriptor.get_value(oneapi::math::dft::config_param::TRANSPOSE, &value);
         EXPECT_EQ(false, value);
 
-        descriptor.set_value(oneapi::mkl::dft::config_param::TRANSPOSE, true);
-        descriptor.get_value(oneapi::mkl::dft::config_param::TRANSPOSE, &value);
+        descriptor.set_value(oneapi::math::dft::config_param::TRANSPOSE, true);
+        descriptor.get_value(oneapi::math::dft::config_param::TRANSPOSE, &value);
         EXPECT_EQ(true, value);
         /* Set value to false again because transpose is not implemented and will fail on commit
          * when using the MKLGPU backend */
-        descriptor.set_value(oneapi::mkl::dft::config_param::TRANSPOSE, false);
+        descriptor.set_value(oneapi::math::dft::config_param::TRANSPOSE, false);
     }
 
     {
         /* Only value currently supported for PACKED_FORMAT is the config_value::CCE_FORMAT */
-        oneapi::mkl::dft::config_value value{
-            oneapi::mkl::dft::config_value::COMMITTED
+        oneapi::math::dft::config_value value{
+            oneapi::math::dft::config_value::COMMITTED
         }; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::PACKED_FORMAT, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::CCE_FORMAT, value);
+        descriptor.get_value(oneapi::math::dft::config_param::PACKED_FORMAT, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::CCE_FORMAT, value);
 
-        descriptor.set_value(oneapi::mkl::dft::config_param::PACKED_FORMAT,
-                             oneapi::mkl::dft::config_value::CCE_FORMAT);
+        descriptor.set_value(oneapi::math::dft::config_param::PACKED_FORMAT,
+                             oneapi::math::dft::config_value::CCE_FORMAT);
 
-        value = oneapi::mkl::dft::config_value::COMMITTED; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::PACKED_FORMAT, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::CCE_FORMAT, value);
+        value = oneapi::math::dft::config_value::COMMITTED; // Initialize with invalid value
+        descriptor.get_value(oneapi::math::dft::config_param::PACKED_FORMAT, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::CCE_FORMAT, value);
     }
 
     {
-        oneapi::mkl::dft::config_value value{
-            oneapi::mkl::dft::config_value::COMMITTED
+        oneapi::math::dft::config_value value{
+            oneapi::math::dft::config_value::COMMITTED
         }; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::WORKSPACE_PLACEMENT, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::WORKSPACE_AUTOMATIC, value);
+        descriptor.get_value(oneapi::math::dft::config_param::WORKSPACE_PLACEMENT, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::WORKSPACE_AUTOMATIC, value);
 
-        descriptor.set_value(oneapi::mkl::dft::config_param::WORKSPACE_PLACEMENT,
-                             oneapi::mkl::dft::config_value::WORKSPACE_EXTERNAL);
+        descriptor.set_value(oneapi::math::dft::config_param::WORKSPACE_PLACEMENT,
+                             oneapi::math::dft::config_value::WORKSPACE_EXTERNAL);
 
-        value = oneapi::mkl::dft::config_value::COMMITTED; // Initialize with invalid value
-        descriptor.get_value(oneapi::mkl::dft::config_param::WORKSPACE_PLACEMENT, &value);
-        EXPECT_EQ(oneapi::mkl::dft::config_value::WORKSPACE_EXTERNAL, value);
-        descriptor.set_value(oneapi::mkl::dft::config_param::WORKSPACE_PLACEMENT,
-                             oneapi::mkl::dft::config_value::WORKSPACE_AUTOMATIC);
+        value = oneapi::math::dft::config_value::COMMITTED; // Initialize with invalid value
+        descriptor.get_value(oneapi::math::dft::config_param::WORKSPACE_PLACEMENT, &value);
+        EXPECT_EQ(oneapi::math::dft::config_value::WORKSPACE_EXTERNAL, value);
+        descriptor.set_value(oneapi::math::dft::config_param::WORKSPACE_PLACEMENT,
+                             oneapi::math::dft::config_value::WORKSPACE_AUTOMATIC);
     }
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static void get_readonly_values() {
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
 
-    oneapi::mkl::dft::domain domain_value;
-    descriptor.get_value(oneapi::mkl::dft::config_param::FORWARD_DOMAIN, &domain_value);
+    oneapi::math::dft::domain domain_value;
+    descriptor.get_value(oneapi::math::dft::config_param::FORWARD_DOMAIN, &domain_value);
     EXPECT_EQ(domain_value, domain);
 
-    oneapi::mkl::dft::precision precision_value;
-    descriptor.get_value(oneapi::mkl::dft::config_param::PRECISION, &precision_value);
+    oneapi::math::dft::precision precision_value;
+    descriptor.get_value(oneapi::math::dft::config_param::PRECISION, &precision_value);
     EXPECT_EQ(precision_value, precision);
 
     std::int64_t dimension_value;
-    descriptor.get_value(oneapi::mkl::dft::config_param::DIMENSION, &dimension_value);
+    descriptor.get_value(oneapi::math::dft::config_param::DIMENSION, &dimension_value);
     EXPECT_EQ(dimension_value, 1);
 
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor3D{ default_3d_lengths };
-    descriptor3D.get_value(oneapi::mkl::dft::config_param::DIMENSION, &dimension_value);
+    oneapi::math::dft::descriptor<precision, domain> descriptor3D{ default_3d_lengths };
+    descriptor3D.get_value(oneapi::math::dft::config_param::DIMENSION, &dimension_value);
     EXPECT_EQ(dimension_value, 3);
 
-    oneapi::mkl::dft::config_value commit_status;
-    descriptor.get_value(oneapi::mkl::dft::config_param::COMMIT_STATUS, &commit_status);
-    EXPECT_EQ(commit_status, oneapi::mkl::dft::config_value::UNCOMMITTED);
+    oneapi::math::dft::config_value commit_status;
+    descriptor.get_value(oneapi::math::dft::config_param::COMMIT_STATUS, &commit_status);
+    EXPECT_EQ(commit_status, oneapi::math::dft::config_value::UNCOMMITTED);
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static void set_readonly_values() {
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
-
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::FORWARD_DOMAIN,
-                                      oneapi::mkl::dft::domain::REAL),
-                 oneapi::mkl::invalid_argument);
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::FORWARD_DOMAIN,
-                                      oneapi::mkl::dft::domain::COMPLEX),
-                 oneapi::mkl::invalid_argument);
-
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::PRECISION,
-                                      oneapi::mkl::dft::precision::SINGLE),
-                 oneapi::mkl::invalid_argument);
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::PRECISION,
-                                      oneapi::mkl::dft::precision::DOUBLE),
-                 oneapi::mkl::invalid_argument);
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::FORWARD_DOMAIN,
+                                      oneapi::math::dft::domain::REAL),
+                 oneapi::math::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::FORWARD_DOMAIN,
+                                      oneapi::math::dft::domain::COMPLEX),
+                 oneapi::math::invalid_argument);
+
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::PRECISION,
+                                      oneapi::math::dft::precision::SINGLE),
+                 oneapi::math::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::PRECISION,
+                                      oneapi::math::dft::precision::DOUBLE),
+                 oneapi::math::invalid_argument);
 
     std::int64_t set_dimension{ 3 };
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::DIMENSION, set_dimension),
-                 oneapi::mkl::invalid_argument);
-
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::COMMIT_STATUS,
-                                      oneapi::mkl::dft::config_value::COMMITTED),
-                 oneapi::mkl::invalid_argument);
-    EXPECT_THROW(descriptor.set_value(oneapi::mkl::dft::config_param::COMMIT_STATUS,
-                                      oneapi::mkl::dft::config_value::UNCOMMITTED),
-                 oneapi::mkl::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::DIMENSION, set_dimension),
+                 oneapi::math::invalid_argument);
+
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::COMMIT_STATUS,
+                                      oneapi::math::dft::config_value::COMMITTED),
+                 oneapi::math::invalid_argument);
+    EXPECT_THROW(descriptor.set_value(oneapi::math::dft::config_param::COMMIT_STATUS,
+                                      oneapi::math::dft::config_value::UNCOMMITTED),
+                 oneapi::math::invalid_argument);
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static void get_commited(sycl::queue& sycl_queue) {
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
     commit_descriptor(descriptor, sycl_queue);
 
-    oneapi::mkl::dft::config_value commit_status;
-    descriptor.get_value(oneapi::mkl::dft::config_param::COMMIT_STATUS, &commit_status);
-    EXPECT_EQ(commit_status, oneapi::mkl::dft::config_value::COMMITTED);
+    oneapi::math::dft::config_value commit_status;
+    descriptor.get_value(oneapi::math::dft::config_param::COMMIT_STATUS, &commit_status);
+    EXPECT_EQ(commit_status, oneapi::math::dft::config_value::COMMITTED);
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 inline void recommit_values(sycl::queue& sycl_queue) {
-    using oneapi::mkl::dft::config_param;
-    using oneapi::mkl::dft::config_value;
+    using oneapi::math::dft::config_param;
+    using oneapi::math::dft::config_value;
     using PrecisionType =
-        typename std::conditional_t<precision == oneapi::mkl::dft::precision::SINGLE, float,
+        typename std::conditional_t<precision == oneapi::math::dft::precision::SINGLE, float,
                                     double>;
     using value = std::variant<config_value, std::int64_t, std::int64_t*, bool, PrecisionType>;
 
     // this will hold a param to change and the value to change it to
     using test_params = std::vector<std::pair<config_param, value>>;
 
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
     EXPECT_NO_THROW(commit_descriptor(descriptor, sycl_queue));
 
     std::array<std::int64_t, 2> strides{ 0, 1 };
@@ -551,17 +560,17 @@ inline void recommit_values(sycl::queue& sycl_queue) {
         try {
             commit_descriptor(descriptor, sycl_queue);
         }
-        catch (oneapi::mkl::unimplemented e) {
+        catch (oneapi::math::unimplemented e) {
             std::cout << "unimplemented exception at index " << i << " with error : " << e.what()
                       << "\ncontinuing...\n";
         }
-        catch (oneapi::mkl::exception& e) {
+        catch (oneapi::math::exception& e) {
             FAIL() << "exception at index " << i << " with error : " << e.what();
         }
     }
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 inline void change_queue_causes_wait(sycl::queue& busy_queue) {
     // create a queue with work on it, and then show that work is waited on when the descriptor
     // is committed to a new queue.
@@ -577,7 +586,7 @@ inline void change_queue_causes_wait(sycl::queue& busy_queue) {
     sycl::queue free_queue(busy_queue.get_device(), exception_handler);
 
     // commit the descriptor on the "busy" queue
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
     EXPECT_NO_THROW(commit_descriptor(descriptor, busy_queue));
 
     // add some work to the busy queue
@@ -609,29 +618,29 @@ inline void change_queue_causes_wait(sycl::queue& busy_queue) {
     ASSERT_EQ(after_status, sycl::info::event_command_status::complete);
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 inline void swap_out_dead_queue(sycl::queue& sycl_queue) {
     // test that commit still works when the previously committed queue is no longer in scope
     // the queue is not actually dead (due to reference counting)
 
     // commit the descriptor on the "busy" queue
-    oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+    oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
     {
         sycl::queue transient_queue(sycl_queue.get_device(), exception_handler);
         EXPECT_NO_THROW(commit_descriptor(descriptor, transient_queue));
     }
     EXPECT_NO_THROW(commit_descriptor(descriptor, sycl_queue));
 
-    using ftype = typename std::conditional_t<precision == oneapi::mkl::dft::precision::SINGLE,
+    using ftype = typename std::conditional_t<precision == oneapi::math::dft::precision::SINGLE,
                                               float, double>;
-    using forward_type = typename std::conditional_t<domain == oneapi::mkl::dft::domain::REAL,
+    using forward_type = typename std::conditional_t<domain == oneapi::math::dft::domain::REAL,
                                                      ftype, std::complex<ftype>>;
 
     // add two so that real-complex transforms have space for all the conjugate even components
     auto inout = sycl::malloc_device<forward_type>(default_1d_lengths + 2, sycl_queue);
     sycl_queue.wait();
 
-    auto transform_event = oneapi::mkl::dft::compute_forward<decltype(descriptor), forward_type>(
+    auto transform_event = oneapi::math::dft::compute_forward<decltype(descriptor), forward_type>(
         descriptor, inout, std::vector<sycl::event>{});
     sycl_queue.wait();
 
@@ -641,17 +650,17 @@ inline void swap_out_dead_queue(sycl::queue& sycl_queue) {
     sycl::free(inout, sycl_queue);
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static int test_move() {
-    using config_param = oneapi::mkl::dft::config_param;
+    using config_param = oneapi::math::dft::config_param;
     // Use forward distance to test an element copied by value (ie. not on heap)
     std::int64_t fwdDistanceRef(123);
     // Use the DFT dimensions to test heap allocated values.
     {
         // Move constructor
-        oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+        oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
         descriptor.set_value(config_param::FWD_DISTANCE, fwdDistanceRef);
-        oneapi::mkl::dft::descriptor<precision, domain> descMoved{ std::move(descriptor) };
+        oneapi::math::dft::descriptor<precision, domain> descMoved{ std::move(descriptor) };
         std::int64_t fwdDistance(0), dftLength(0);
         descMoved.get_value(config_param::FWD_DISTANCE, &fwdDistance);
         EXPECT_EQ(fwdDistance, fwdDistanceRef);
@@ -660,9 +669,9 @@ static int test_move() {
     }
     {
         // Move assignment
-        oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+        oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
         descriptor.set_value(config_param::FWD_DISTANCE, fwdDistanceRef);
-        oneapi::mkl::dft::descriptor<precision, domain> descMoved{ default_1d_lengths };
+        oneapi::math::dft::descriptor<precision, domain> descMoved{ default_1d_lengths };
         descMoved = std::move(descriptor);
         std::int64_t fwdDistance(0), dftLength(0);
         descMoved.get_value(config_param::FWD_DISTANCE, &fwdDistance);
@@ -674,7 +683,7 @@ static int test_move() {
     return !::testing::Test::HasFailure();
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 static int test_getter_setter() {
     set_and_get_lengths<precision, domain>();
     set_and_get_io_strides<precision, domain>();
@@ -686,11 +695,11 @@ static int test_getter_setter() {
     return !::testing::Test::HasFailure();
 }
 
-template <oneapi::mkl::dft::precision precision, oneapi::mkl::dft::domain domain>
+template <oneapi::math::dft::precision precision, oneapi::math::dft::domain domain>
 int test_commit(sycl::device* dev) {
     sycl::queue sycl_queue(*dev, exception_handler);
 
-    if constexpr (precision == oneapi::mkl::dft::precision::DOUBLE) {
+    if constexpr (precision == oneapi::math::dft::precision::DOUBLE) {
         if (!dev->has(sycl::aspect::fp64)) {
             std::cout << "Device does not support double precision." << std::endl;
             return test_skipped;
@@ -699,10 +708,10 @@ int test_commit(sycl::device* dev) {
 
     // test that descriptor is supported
     try {
-        oneapi::mkl::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
+        oneapi::math::dft::descriptor<precision, domain> descriptor{ default_1d_lengths };
         commit_descriptor(descriptor, sycl_queue);
     }
-    catch (oneapi::mkl::unimplemented& e) {
+    catch (oneapi::math::unimplemented& e) {
         std::cout << "Skipping because simple commit not supported. Reason: \"" << e.what()
                   << "\"\n";
         return test_skipped;
@@ -717,66 +726,68 @@ int test_commit(sycl::device* dev) {
 }
 
 TEST(DescriptorTests, DescriptorMoveRealSingle) {
-    EXPECT_TRUE((test_move<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::REAL>()));
+    EXPECT_TRUE(
+        (test_move<oneapi::math::dft::precision::SINGLE, oneapi::math::dft::domain::REAL>()));
 }
 
 TEST(DescriptorTests, DescriptorMoveRealDouble) {
-    EXPECT_TRUE((test_move<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::REAL>()));
+    EXPECT_TRUE(
+        (test_move<oneapi::math::dft::precision::DOUBLE, oneapi::math::dft::domain::REAL>()));
 }
 
 TEST(DescriptorTests, DescriptorMoveComplexSingle) {
     EXPECT_TRUE(
-        (test_move<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::COMPLEX>()));
+        (test_move<oneapi::math::dft::precision::SINGLE, oneapi::math::dft::domain::COMPLEX>()));
 }
 
 TEST(DescriptorTests, DescriptorMoveComplexDouble) {
     EXPECT_TRUE(
-        (test_move<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::COMPLEX>()));
+        (test_move<oneapi::math::dft::precision::DOUBLE, oneapi::math::dft::domain::COMPLEX>()));
 }
 
 TEST(DescriptorTests, DescriptorTestsRealSingle) {
-    EXPECT_TRUE((
-        test_getter_setter<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::REAL>()));
+    EXPECT_TRUE((test_getter_setter<oneapi::math::dft::precision::SINGLE,
+                                    oneapi::math::dft::domain::REAL>()));
 }
 
 TEST(DescriptorTests, DescriptorTestsRealDouble) {
-    EXPECT_TRUE((
-        test_getter_setter<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::REAL>()));
+    EXPECT_TRUE((test_getter_setter<oneapi::math::dft::precision::DOUBLE,
+                                    oneapi::math::dft::domain::REAL>()));
 }
 
 TEST(DescriptorTests, DescriptorTestsComplexSingle) {
-    EXPECT_TRUE((test_getter_setter<oneapi::mkl::dft::precision::SINGLE,
-                                    oneapi::mkl::dft::domain::COMPLEX>()));
+    EXPECT_TRUE((test_getter_setter<oneapi::math::dft::precision::SINGLE,
+                                    oneapi::math::dft::domain::COMPLEX>()));
 }
 
 TEST(DescriptorTests, DescriptorTestsComplexDouble) {
-    EXPECT_TRUE((test_getter_setter<oneapi::mkl::dft::precision::DOUBLE,
-                                    oneapi::mkl::dft::domain::COMPLEX>()));
+    EXPECT_TRUE((test_getter_setter<oneapi::math::dft::precision::DOUBLE,
+                                    oneapi::math::dft::domain::COMPLEX>()));
 }
 
 class DescriptorCommitTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(DescriptorCommitTests, DescriptorCommitTestsRealSingle) {
     EXPECT_TRUEORSKIP(
-        (test_commit<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::REAL>(
+        (test_commit<oneapi::math::dft::precision::SINGLE, oneapi::math::dft::domain::REAL>(
             GetParam())));
 }
 
 TEST_P(DescriptorCommitTests, DescriptorCommitTestsRealDouble) {
     EXPECT_TRUEORSKIP(
-        (test_commit<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::REAL>(
+        (test_commit<oneapi::math::dft::precision::DOUBLE, oneapi::math::dft::domain::REAL>(
             GetParam())));
 }
 
 TEST_P(DescriptorCommitTests, DescriptorCommitTestsComplexSingle) {
     EXPECT_TRUEORSKIP(
-        (test_commit<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::COMPLEX>(
+        (test_commit<oneapi::math::dft::precision::SINGLE, oneapi::math::dft::domain::COMPLEX>(
             GetParam())));
 }
 
 TEST_P(DescriptorCommitTests, DescriptorCommitTestsComplexDouble) {
     EXPECT_TRUEORSKIP(
-        (test_commit<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::COMPLEX>(
+        (test_commit<oneapi::math::dft::precision::DOUBLE, oneapi::math::dft::domain::COMPLEX>(
             GetParam())));
 }
 
diff --git a/tests/unit_tests/dft/source/workspace_external_tests.cpp b/tests/unit_tests/dft/source/workspace_external_tests.cpp
index f96544a90..15b21cd65 100644
--- a/tests/unit_tests/dft/source/workspace_external_tests.cpp
+++ b/tests/unit_tests/dft/source/workspace_external_tests.cpp
@@ -35,9 +35,9 @@ extern std::vector<sycl::device*> devices;
 
 class WorkspaceExternalTests : public ::testing::TestWithParam<sycl::device*> {};
 
-template <oneapi::mkl::dft::precision prec, oneapi::mkl::dft::domain dom>
+template <oneapi::math::dft::precision prec, oneapi::math::dft::domain dom>
 int test_workspace_external_usm_impl(std::size_t dft_size, sycl::device* dev) {
-    using namespace oneapi::mkl::dft;
+    using namespace oneapi::math::dft;
     using scalar_t = std::conditional_t<prec == precision::DOUBLE, double, float>;
     using forward_t = std::conditional_t<dom == domain::COMPLEX, std::complex<scalar_t>, scalar_t>;
     using backward_t = std::complex<scalar_t>;
@@ -54,7 +54,7 @@ int test_workspace_external_usm_impl(std::size_t dft_size, sycl::device* dev) {
     try {
         commit_descriptor(desc, sycl_queue);
     }
-    catch (oneapi::mkl::unimplemented&) {
+    catch (oneapi::math::unimplemented&) {
         std::cout << "Test configuration not implemented." << std::endl;
         return test_skipped;
     }
@@ -106,9 +106,9 @@ int test_workspace_external_usm_impl(std::size_t dft_size, sycl::device* dev) {
     return sanityCheckPasses ? !::testing::Test::HasFailure() : ::testing::Test::HasFailure();
 }
 
-template <oneapi::mkl::dft::precision prec, oneapi::mkl::dft::domain dom>
+template <oneapi::math::dft::precision prec, oneapi::math::dft::domain dom>
 int test_workspace_external_buffer_impl(std::size_t dft_size, sycl::device* dev) {
-    using namespace oneapi::mkl::dft;
+    using namespace oneapi::math::dft;
     using scalar_t = std::conditional_t<prec == precision::DOUBLE, double, float>;
     using forward_t = std::conditional_t<dom == domain::COMPLEX, std::complex<scalar_t>, scalar_t>;
     using backward_t = std::complex<scalar_t>;
@@ -125,7 +125,7 @@ int test_workspace_external_buffer_impl(std::size_t dft_size, sycl::device* dev)
     try {
         commit_descriptor(desc, sycl_queue);
     }
-    catch (oneapi::mkl::unimplemented&) {
+    catch (oneapi::math::unimplemented&) {
         std::cout << "Test configuration not implemented." << std::endl;
         return test_skipped;
     }
@@ -170,49 +170,49 @@ int test_workspace_external_buffer_impl(std::size_t dft_size, sycl::device* dev)
     return sanityCheckPasses ? !::testing::Test::HasFailure() : ::testing::Test::HasFailure();
 }
 
-template <oneapi::mkl::dft::precision prec, oneapi::mkl::dft::domain dom>
+template <oneapi::math::dft::precision prec, oneapi::math::dft::domain dom>
 void test_workspace_external_usm(sycl::device* dev) {
     EXPECT_TRUEORSKIP((test_workspace_external_usm_impl<prec, dom>(2, dev)));
     EXPECT_TRUEORSKIP((test_workspace_external_usm_impl<prec, dom>(1024 * 3 * 5 * 7 * 16, dev)));
 }
 
-template <oneapi::mkl::dft::precision prec, oneapi::mkl::dft::domain dom>
+template <oneapi::math::dft::precision prec, oneapi::math::dft::domain dom>
 void test_workspace_external_buffer(sycl::device* dev) {
     EXPECT_TRUEORSKIP((test_workspace_external_buffer_impl<prec, dom>(2, dev)));
     EXPECT_TRUEORSKIP((test_workspace_external_buffer_impl<prec, dom>(1024 * 3 * 5 * 7 * 16, dev)));
 }
 
 TEST_P(WorkspaceExternalTests, TestWorkspaceExternalSingleUsm) {
-    using precision = oneapi::mkl::dft::precision;
-    using domain = oneapi::mkl::dft::domain;
+    using precision = oneapi::math::dft::precision;
+    using domain = oneapi::math::dft::domain;
     test_workspace_external_usm<precision::SINGLE, domain::REAL>(GetParam());
     test_workspace_external_usm<precision::SINGLE, domain::COMPLEX>(GetParam());
 }
 
 TEST_P(WorkspaceExternalTests, TestWorkspaceExternalDoubleUsm) {
-    using precision = oneapi::mkl::dft::precision;
-    using domain = oneapi::mkl::dft::domain;
+    using precision = oneapi::math::dft::precision;
+    using domain = oneapi::math::dft::domain;
     test_workspace_external_usm<precision::DOUBLE, domain::REAL>(GetParam());
     test_workspace_external_usm<precision::DOUBLE, domain::COMPLEX>(GetParam());
 }
 
 TEST_P(WorkspaceExternalTests, TestWorkspaceExternalSingleBuffer) {
-    using precision = oneapi::mkl::dft::precision;
-    using domain = oneapi::mkl::dft::domain;
+    using precision = oneapi::math::dft::precision;
+    using domain = oneapi::math::dft::domain;
     test_workspace_external_buffer<precision::SINGLE, domain::REAL>(GetParam());
     test_workspace_external_buffer<precision::SINGLE, domain::COMPLEX>(GetParam());
 }
 
 TEST_P(WorkspaceExternalTests, TestWorkspaceExternalDoubleBuffer) {
-    using precision = oneapi::mkl::dft::precision;
-    using domain = oneapi::mkl::dft::domain;
+    using precision = oneapi::math::dft::precision;
+    using domain = oneapi::math::dft::domain;
     test_workspace_external_buffer<precision::DOUBLE, domain::REAL>(GetParam());
     test_workspace_external_buffer<precision::DOUBLE, domain::COMPLEX>(GetParam());
 }
 
 /// A test where set_workspace is called when an external workspace is not set.
 TEST_P(WorkspaceExternalTests, SetWorkspaceOnWorkspaceAutomatic) {
-    using namespace oneapi::mkl::dft;
+    using namespace oneapi::math::dft;
     sycl::queue sycl_queue(*GetParam());
     const int dft_len = 1024 * 3 * 5 * 7 * 16; // A size likely to require an external workspace.
     float* fft_data_usm = sycl::malloc_device<float>(dft_len * 2, sycl_queue);
@@ -223,7 +223,7 @@ TEST_P(WorkspaceExternalTests, SetWorkspaceOnWorkspaceAutomatic) {
         commit_descriptor(desc_usm, sycl_queue);
         commit_descriptor(desc_buf, sycl_queue);
     }
-    catch (oneapi::mkl::unimplemented&) {
+    catch (oneapi::math::unimplemented&) {
         // The DFT size may not be supported. Use a size that is likely to be supported, even if
         // that means no external workspace is actually used.
         descriptor<precision::SINGLE, domain::COMPLEX> desc_usm2(2), desc_buf2(2);
@@ -264,10 +264,10 @@ TEST_P(WorkspaceExternalTests, SetWorkspaceOnWorkspaceAutomatic) {
     sycl_queue.wait_and_throw();
 
     // Should not work:
-    EXPECT_THROW(compute_forward(desc_usm, fft_data_buf), oneapi::mkl::invalid_argument);
-    EXPECT_THROW(compute_forward(desc_buf, fft_data_usm), oneapi::mkl::invalid_argument);
-    EXPECT_THROW(compute_backward(desc_usm, fft_data_buf), oneapi::mkl::invalid_argument);
-    EXPECT_THROW(compute_backward(desc_buf, fft_data_usm), oneapi::mkl::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_usm, fft_data_buf), oneapi::math::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_buf, fft_data_usm), oneapi::math::invalid_argument);
+    EXPECT_THROW(compute_backward(desc_usm, fft_data_buf), oneapi::math::invalid_argument);
+    EXPECT_THROW(compute_backward(desc_buf, fft_data_usm), oneapi::math::invalid_argument);
     sycl_queue.wait_and_throw();
 
     // Free any allocations:
@@ -277,7 +277,7 @@ TEST_P(WorkspaceExternalTests, SetWorkspaceOnWorkspaceAutomatic) {
 
 /// Test that the implementation throws as expected.
 TEST_P(WorkspaceExternalTests, ThrowOnBadCalls) {
-    using namespace oneapi::mkl::dft;
+    using namespace oneapi::math::dft;
     sycl::queue sycl_queue(*GetParam());
     const int dft_len = 1024 * 3 * 5 * 7 * 16; // A size likely to require an external workspace.
     float* fft_data_usm = sycl::malloc_device<float>(dft_len * 2, sycl_queue);
@@ -289,13 +289,13 @@ TEST_P(WorkspaceExternalTests, ThrowOnBadCalls) {
     std::int64_t workspace_bytes = -10;
     float* usm_workspace = nullptr;
     EXPECT_THROW(desc_usm.get_value(config_param::WORKSPACE_EXTERNAL_BYTES, &workspace_bytes),
-                 oneapi::mkl::invalid_argument);
-    EXPECT_THROW(desc_usm.set_workspace(usm_workspace), oneapi::mkl::uninitialized);
+                 oneapi::math::invalid_argument);
+    EXPECT_THROW(desc_usm.set_workspace(usm_workspace), oneapi::math::uninitialized);
     try {
         commit_descriptor(desc_usm, sycl_queue);
         commit_descriptor(desc_buf, sycl_queue);
     }
-    catch (oneapi::mkl::unimplemented&) {
+    catch (oneapi::math::unimplemented&) {
         // DFT size may not be supported. Use a DFT size that probably will be, even if it
         // won't actually use an external workspace internally.
         descriptor<precision::SINGLE, domain::COMPLEX> desc_usm2(2), desc_buf2(2);
@@ -311,16 +311,16 @@ TEST_P(WorkspaceExternalTests, ThrowOnBadCalls) {
     EXPECT_GE(workspace_bytes, 0);
 
     // We haven't set a workspace, so the following should fail;
-    EXPECT_THROW(compute_forward(desc_usm, fft_data_usm), oneapi::mkl::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_usm, fft_data_usm), oneapi::math::invalid_argument);
     sycl_queue.wait_and_throw();
-    EXPECT_THROW(compute_forward(desc_usm, fft_data_buf), oneapi::mkl::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_usm, fft_data_buf), oneapi::math::invalid_argument);
     sycl_queue.wait_and_throw();
 
     if (workspace_bytes > 0) {
-        EXPECT_THROW(desc_usm.set_workspace(nullptr), oneapi::mkl::invalid_argument);
+        EXPECT_THROW(desc_usm.set_workspace(nullptr), oneapi::math::invalid_argument);
         sycl::buffer<float> undersize_workspace(
             static_cast<std::size_t>(workspace_bytes) / sizeof(float) - 1);
-        EXPECT_THROW(desc_buf.set_workspace(undersize_workspace), oneapi::mkl::invalid_argument);
+        EXPECT_THROW(desc_buf.set_workspace(undersize_workspace), oneapi::math::invalid_argument);
     }
 
     usm_workspace = sycl::malloc_device<float>(
@@ -341,10 +341,10 @@ TEST_P(WorkspaceExternalTests, ThrowOnBadCalls) {
     sycl_queue.wait_and_throw();
 
     // Should not work:
-    EXPECT_THROW(compute_forward(desc_usm, fft_data_buf), oneapi::mkl::invalid_argument);
-    EXPECT_THROW(compute_forward(desc_buf, fft_data_usm), oneapi::mkl::invalid_argument);
-    EXPECT_THROW(compute_backward(desc_usm, fft_data_buf), oneapi::mkl::invalid_argument);
-    EXPECT_THROW(compute_backward(desc_buf, fft_data_usm), oneapi::mkl::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_usm, fft_data_buf), oneapi::math::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_buf, fft_data_usm), oneapi::math::invalid_argument);
+    EXPECT_THROW(compute_backward(desc_usm, fft_data_buf), oneapi::math::invalid_argument);
+    EXPECT_THROW(compute_backward(desc_buf, fft_data_usm), oneapi::math::invalid_argument);
     sycl_queue.wait_and_throw();
 
     // Free any allocations:
@@ -353,7 +353,7 @@ TEST_P(WorkspaceExternalTests, ThrowOnBadCalls) {
 }
 
 TEST_P(WorkspaceExternalTests, RecommitBehaviour) {
-    using namespace oneapi::mkl::dft;
+    using namespace oneapi::math::dft;
     sycl::queue sycl_queue(*GetParam());
     const int dft_len = 1024 * 3 * 5 * 7 * 16; // A size likely to require an external workspace.
     float* fft_data_usm = sycl::malloc_device<float>(dft_len * 2, sycl_queue);
@@ -362,7 +362,7 @@ TEST_P(WorkspaceExternalTests, RecommitBehaviour) {
         // WORKSPACE_EXTERNAL is NOT set.
         commit_descriptor(desc_usm, sycl_queue);
     }
-    catch (oneapi::mkl::unimplemented&) {
+    catch (oneapi::math::unimplemented&) {
         // DFT size may not be supported. Use a DFT size that probably will be, even if it
         // won't actually use an external workspace internally.
         descriptor<precision::SINGLE, domain::COMPLEX> desc_usm2(2);
@@ -382,7 +382,7 @@ TEST_P(WorkspaceExternalTests, RecommitBehaviour) {
     commit_descriptor(desc_usm, sycl_queue);
 
     // No workspace, expect throw
-    EXPECT_THROW(compute_forward(desc_usm, fft_data_usm), oneapi::mkl::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_usm, fft_data_usm), oneapi::math::invalid_argument);
 
     desc_usm.set_workspace(usm_workspace);
 
@@ -391,7 +391,7 @@ TEST_P(WorkspaceExternalTests, RecommitBehaviour) {
 
     // Recommitting should require workspace to be set again.
     commit_descriptor(desc_usm, sycl_queue);
-    EXPECT_THROW(compute_forward(desc_usm, fft_data_usm), oneapi::mkl::invalid_argument);
+    EXPECT_THROW(compute_forward(desc_usm, fft_data_usm), oneapi::math::invalid_argument);
     sycl_queue.wait_and_throw();
 
     // Free any allocations:
diff --git a/tests/unit_tests/include/test_helper.hpp b/tests/unit_tests/include/test_helper.hpp
index cb27c9098..5d2996b57 100644
--- a/tests/unit_tests/include/test_helper.hpp
+++ b/tests/unit_tests/include/test_helper.hpp
@@ -30,9 +30,9 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl/detail/backend_selector.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math/detail/backend_selector.hpp"
 
 #ifdef _WIN64
 #include <malloc.h>
@@ -73,112 +73,112 @@
     if (d->get_info<sycl::info::device::double_fp_config>().size() == 0) \
     GTEST_SKIP() << "Double precision is not supported on the device"
 
-#if defined(ONEMKL_ENABLE_MKLCPU_BACKEND) || defined(ONEMKL_ENABLE_NETLIB_BACKEND)
-#ifdef ONEMKL_ENABLE_MKLCPU_BACKEND
+#if defined(ONEMATH_ENABLE_MKLCPU_BACKEND) || defined(ONEMATH_ENABLE_NETLIB_BACKEND)
+#ifdef ONEMATH_ENABLE_MKLCPU_BACKEND
 #define TEST_RUN_INTELCPU_SELECT_NO_ARGS(q, func) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ q })
+    func(oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ q })
 #define TEST_RUN_INTELCPU_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::mklcpu>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_INTELCPU_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::netlib>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::netlib>{ q }, __VA_ARGS__)
 #endif
 #else
 #define TEST_RUN_INTELCPU_SELECT_NO_ARGS(q, func)
 #define TEST_RUN_INTELCPU_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_MKLGPU_BACKEND
+#ifdef ONEMATH_ENABLE_MKLGPU_BACKEND
 #define TEST_RUN_INTELGPU_SELECT_NO_ARGS(q, func) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklgpu>{ q })
+    func(oneapi::math::backend_selector<oneapi::math::backend::mklgpu>{ q })
 #define TEST_RUN_INTELGPU_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklgpu>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::mklgpu>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_INTELGPU_SELECT_NO_ARGS(q, func)
 #define TEST_RUN_INTELGPU_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_CUBLAS_BACKEND
+#ifdef ONEMATH_ENABLE_CUBLAS_BACKEND
 #define TEST_RUN_NVIDIAGPU_CUBLAS_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::cublas>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_NVIDIAGPU_CUBLAS_SELECT(q, func, ...)
 #endif
-#ifdef ONEMKL_ENABLE_CUSOLVER_BACKEND
+#ifdef ONEMATH_ENABLE_CUSOLVER_BACKEND
 #define TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::cusolver>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_ROCBLAS_BACKEND
+#ifdef ONEMATH_ENABLE_ROCBLAS_BACKEND
 #define TEST_RUN_AMDGPU_ROCBLAS_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::rocblas>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::rocblas>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_AMDGPU_ROCBLAS_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_CURAND_BACKEND
+#ifdef ONEMATH_ENABLE_CURAND_BACKEND
 #define TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::curand>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::curand>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_ROCRAND_BACKEND
+#ifdef ONEMATH_ENABLE_ROCRAND_BACKEND
 #define TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::rocrand>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::rocrand>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_ROCSOLVER_BACKEND
+#ifdef ONEMATH_ENABLE_ROCSOLVER_BACKEND
 #define TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::rocsolver>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::rocsolver>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_PORTBLAS_BACKEND
+#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND
 #define TEST_RUN_PORTBLAS_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::portblas>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::portblas>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_PORTBLAS_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_CUFFT_BACKEND
+#ifdef ONEMATH_ENABLE_CUFFT_BACKEND
 #define TEST_RUN_NVIDIAGPU_CUFFT_SELECT_NO_ARGS(q, func) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::cufft>{ q })
+    func(oneapi::math::backend_selector<oneapi::math::backend::cufft>{ q })
 #define TEST_RUN_NVIDIAGPU_CUFFT_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::cufft>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::cufft>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_NVIDIAGPU_CUFFT_SELECT_NO_ARGS(q, func)
 #define TEST_RUN_NVIDIAGPU_CUFFT_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_ROCFFT_BACKEND
+#ifdef ONEMATH_ENABLE_ROCFFT_BACKEND
 #define TEST_RUN_AMDGPU_ROCFFT_SELECT_NO_ARGS(q, func) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::rocfft>{ q })
+    func(oneapi::math::backend_selector<oneapi::math::backend::rocfft>{ q })
 #define TEST_RUN_AMDGPU_ROCFFT_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::rocfft>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::rocfft>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_AMDGPU_ROCFFT_SELECT_NO_ARGS(q, func)
 #define TEST_RUN_AMDGPU_ROCFFT_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_PORTFFT_BACKEND
+#ifdef ONEMATH_ENABLE_PORTFFT_BACKEND
 #define TEST_RUN_PORTFFT_SELECT_NO_ARGS(q, func) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::portfft>{ q })
+    func(oneapi::math::backend_selector<oneapi::math::backend::portfft>{ q })
 #define TEST_RUN_PORTFFT_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::portfft>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::portfft>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_PORTFFT_SELECT_NO_ARGS(q, func)
 #define TEST_RUN_PORTFFT_SELECT(q, func, ...)
 #endif
 
-#ifdef ONEMKL_ENABLE_CUSPARSE_BACKEND
+#ifdef ONEMATH_ENABLE_CUSPARSE_BACKEND
 #define TEST_RUN_NVIDIAGPU_CUSPARSE_SELECT(q, func, ...) \
-    func(oneapi::mkl::backend_selector<oneapi::mkl::backend::cusparse>{ q }, __VA_ARGS__)
+    func(oneapi::math::backend_selector<oneapi::math::backend::cusparse>{ q }, __VA_ARGS__)
 #else
 #define TEST_RUN_NVIDIAGPU_CUSPARSE_SELECT(q, func, ...)
 #endif
@@ -300,9 +300,10 @@ class DeviceNamePrint {
 class LayoutDeviceNamePrint {
 public:
     std::string operator()(
-        testing::TestParamInfo<std::tuple<sycl::device*, oneapi::mkl::layout>> dev) const {
-        std::string layout_name =
-            std::get<1>(dev.param) == oneapi::mkl::layout::col_major ? "Column_Major" : "Row_Major";
+        testing::TestParamInfo<std::tuple<sycl::device*, oneapi::math::layout>> dev) const {
+        std::string layout_name = std::get<1>(dev.param) == oneapi::math::layout::col_major
+                                      ? "Column_Major"
+                                      : "Row_Major";
         std::string dev_name = std::get<0>(dev.param)->get_info<sycl::info::device::name>();
         for (std::string::size_type i = 0; i < dev_name.size(); ++i) {
             if (!isalnum(dev_name[i]))
@@ -314,9 +315,9 @@ class LayoutDeviceNamePrint {
 };
 
 /* to accommodate Windows and Linux differences between alligned_alloc and
-   _aligned_malloc calls use oneapi::mkl::aligned_alloc and oneapi::mkl::aligned_free instead */
+   _aligned_malloc calls use oneapi::math::aligned_alloc and oneapi::math::aligned_free instead */
 namespace oneapi {
-namespace mkl {
+namespace math {
 
 static inline void* aligned_alloc(size_t align, size_t size) {
 #ifdef _WIN64
@@ -340,10 +341,10 @@ static inline void* malloc_shared(size_t align, size_t size, sycl::device dev, s
 #ifdef _WIN64
     return sycl::malloc_shared(size, dev, ctx);
 #else
-#if defined(ONEMKL_ENABLE_CUBLAS_BACKEND) || defined(ONEMKL_ENABLE_ROCBLAS_BACKEND)
+#if defined(ONEMATH_ENABLE_CUBLAS_BACKEND) || defined(ONEMATH_ENABLE_ROCBLAS_BACKEND)
     return sycl::aligned_alloc_shared(align, size, dev, ctx);
 #endif
-#if !defined(ONEMKL_ENABLE_CUBLAS_BACKEND) && !defined(ONEMKL_ENABLE_ROCBLAS_BACKEND)
+#if !defined(ONEMATH_ENABLE_CUBLAS_BACKEND) && !defined(ONEMATH_ENABLE_ROCBLAS_BACKEND)
     return sycl::malloc_shared(size, dev, ctx);
 #endif
 #endif
@@ -354,10 +355,10 @@ static inline void* malloc_device(size_t align, size_t size, sycl::device dev, s
 #ifdef _WIN64
     return sycl::malloc_device(size, dev, ctx);
 #else
-#if defined(ONEMKL_ENABLE_CUBLAS_BACKEND) || defined(ONEMKL_ENABLE_ROCBLAS_BACKEND)
+#if defined(ONEMATH_ENABLE_CUBLAS_BACKEND) || defined(ONEMATH_ENABLE_ROCBLAS_BACKEND)
     return sycl::aligned_alloc_device(align, size, dev, ctx);
 #endif
-#if !defined(ONEMKL_ENABLE_CUBLAS_BACKEND) && !defined(ONEMKL_ENABLE_ROCBLAS_BACKEND)
+#if !defined(ONEMATH_ENABLE_CUBLAS_BACKEND) && !defined(ONEMATH_ENABLE_ROCBLAS_BACKEND)
     return sycl::malloc_device(size, dev, ctx);
 #endif
 #endif
@@ -371,7 +372,7 @@ static inline void free_usm(void* p, sycl::context ctx) {
     sycl::free(p, ctx);
 }
 
-} // namespace mkl
+} // namespace math
 } // namespace oneapi
 
 #endif // _TEST_HELPER_HPP_
diff --git a/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp b/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp
index 757541b11..c379e5d25 100644
--- a/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp
+++ b/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp
@@ -127,9 +127,9 @@ bool check_geqrf_accuracy(int64_t m, int64_t n, const std::vector<fp>& A, int64_
     /* |A - Q R| < |A| O(eps) */
     std::vector<fp> R(m * n);
     int64_t ldr = m;
-    reference::laset(oneapi::mkl::uplo::lower, m, n, 0.0, 0.0, R.data(), ldr);
-    reference::lacpy(oneapi::mkl::uplo::upper, m, n, A.data(), lda, R.data(), ldr);
-    auto info = reference::or_un_mqr(oneapi::mkl::side::left, oneapi::mkl::transpose::nontrans, m,
+    reference::laset(oneapi::math::uplo::lower, m, n, 0.0, 0.0, R.data(), ldr);
+    reference::lacpy(oneapi::math::uplo::upper, m, n, A.data(), lda, R.data(), ldr);
+    auto info = reference::or_un_mqr(oneapi::math::side::left, oneapi::math::transpose::nontrans, m,
                                      n, std::min(m, n), A.data(), lda, tau.data(), R.data(), ldr);
     if (0 != info) {
         test_log::lout << "reference ormqr/unmqr failed with info = " << info << std::endl;
@@ -153,7 +153,7 @@ bool check_geqrf_accuracy(int64_t m, int64_t n, const std::vector<fp>& A, int64_
     }
     std::vector<fp> QQ(m * m);
     int64_t ldqq = m;
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, m, m, m,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, m, m, m,
                     1.0, Q.data(), ldq, Q.data(), ldq, 0.0, QQ.data(), ldqq);
     if (!rel_id_err_check(m, QQ, ldqq)) {
         test_log::lout << "Orthogonality check failed" << std::endl;
@@ -173,7 +173,7 @@ bool check_gerqf_accuracy(const std::vector<fp>& A, const std::vector<fp>& A_ini
         std::vector<fp> R(m * n);
         int64_t ldr = m;
         reference::lacpy('A', m, n, A.data(), lda, R.data(), ldr);
-        reference::laset(oneapi::mkl::uplo::lower, n - 1, n - 1, 0.0, 0.0,
+        reference::laset(oneapi::math::uplo::lower, n - 1, n - 1, 0.0, 0.0,
                          R.data() + ((m - n + 1) + 0 * ldr), ldr);
 
         std::vector<fp> Q(lda * n);
@@ -181,7 +181,7 @@ bool check_gerqf_accuracy(const std::vector<fp>& A, const std::vector<fp>& A_ini
         reference::lacpy('A', n, n, A.data() + ((m - n) + 0 * lda), lda, Q.data(), ldq);
 
         auto info =
-            reference::or_un_mrq(oneapi::mkl::side::right, oneapi::mkl::transpose::nontrans, m, n,
+            reference::or_un_mrq(oneapi::math::side::right, oneapi::math::transpose::nontrans, m, n,
                                  std::min(m, n), Q.data(), ldq, tau.data(), R.data(), ldr);
         if (0 != info) {
             test_log::lout << "reference ormqr/unmqr failed with info = " << info << std::endl;
@@ -195,8 +195,8 @@ bool check_gerqf_accuracy(const std::vector<fp>& A, const std::vector<fp>& A_ini
     else {
         std::vector<fp> R(m * n);
         int64_t ldr = m;
-        reference::laset(oneapi::mkl::uplo::lower, m, m, 0.0, 0.0, R.data(), ldr);
-        reference::lacpy(oneapi::mkl::uplo::upper, m, m, A.data() + (0 + (n - m) * lda), lda,
+        reference::laset(oneapi::math::uplo::lower, m, m, 0.0, 0.0, R.data(), ldr);
+        reference::lacpy(oneapi::math::uplo::upper, m, m, A.data() + (0 + (n - m) * lda), lda,
                          R.data() + (0 + (n - m) * ldr), ldr);
 
         std::vector<fp> Q(n * n);
@@ -206,8 +206,9 @@ bool check_gerqf_accuracy(const std::vector<fp>& A, const std::vector<fp>& A_ini
         std::vector<fp> tau2(n);
         for (int64_t i = 0; i < std::min(m, n); i++)
             tau2[n - m + i] = tau[i];
-        auto info = reference::or_un_mrq(oneapi::mkl::side::right, oneapi::mkl::transpose::nontrans,
-                                         m, n, n, Q.data(), ldq, tau2.data(), R.data(), ldr);
+        auto info =
+            reference::or_un_mrq(oneapi::math::side::right, oneapi::math::transpose::nontrans, m, n,
+                                 n, Q.data(), ldq, tau2.data(), R.data(), ldr);
         if (0 != info) {
             test_log::lout << "reference ormqr/unmqr failed with info = " << info << std::endl;
             return false;
@@ -233,7 +234,7 @@ bool check_gerqf_accuracy(const std::vector<fp>& A, const std::vector<fp>& A_ini
 
     std::vector<fp> QQ(std::min(m, n) * std::min(m, n));
     int64_t ldqq = std::min(m, n);
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans,
                     std::min(m, n), std::min(m, n), n, 1.0, Q.data(), ldq, Q.data(), ldq, 0.0,
                     QQ.data(), ldqq);
 
@@ -255,17 +256,17 @@ bool check_getrf_accuracy(int64_t m, int64_t n, const std::vector<fp>& A, int64_
     /* Compute P L U */
     reference::laset('A', m, n, 0.0, 0.0, residual.data(), m);
     if (m < n) {
-        reference::lacpy(oneapi::mkl::uplo::upper, m, n, A.data(), lda, residual.data(), m);
-        reference::trmm(oneapi::mkl::side::left, oneapi::mkl::uplo::lower,
-                        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, m, n, 1.0,
+        reference::lacpy(oneapi::math::uplo::upper, m, n, A.data(), lda, residual.data(), m);
+        reference::trmm(oneapi::math::side::left, oneapi::math::uplo::lower,
+                        oneapi::math::transpose::nontrans, oneapi::math::diag::unit, m, n, 1.0,
                         A.data(), lda, residual.data(), m);
     }
     else {
-        reference::lacpy(oneapi::mkl::uplo::lower, m, n, A.data(), lda, residual.data(), m);
+        reference::lacpy(oneapi::math::uplo::lower, m, n, A.data(), lda, residual.data(), m);
         for (int64_t diag = 0; diag < n; diag++)
             residual[diag + diag * m] = 1.0;
-        reference::trmm(oneapi::mkl::side::right, oneapi::mkl::uplo::upper,
-                        oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::nonunit, m, n, 1.0,
+        reference::trmm(oneapi::math::side::right, oneapi::math::uplo::upper,
+                        oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, m, n, 1.0,
                         A.data(), lda, residual.data(), m);
     }
     reference::laswp(n, residual.data(), m, 1, std::min(m, n), ipiv.data(), -1);
@@ -314,7 +315,7 @@ bool check_getri_accuracy(int64_t n, std::vector<fp> A, int64_t lda, std::vector
 
     /* Compute | I - inv(A)*A |. Store in residual array */
     reference::laset('A', n, n, 0.0, 1.0, residual.data(), n);
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n, n, n,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, n, n,
                     -1.0, A.data(), lda, A_initial.data(), lda, 1.0, residual.data(), n);
 
     /* | I - inv(A)*A | / ( |A| * |inv(A)| * n * ulp ) */
@@ -331,7 +332,7 @@ bool check_getri_accuracy(int64_t n, std::vector<fp> A, int64_t lda, std::vector
 
     /* Compute | I - A*inv(A) |. Store in residual */
     reference::laset('A', n, n, 0.0, 1.0, residual.data(), n);
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n, n, n,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, n, n,
                     -1.0, A_initial.data(), lda, A.data(), lda, 1.0, residual.data(), n);
 
     /* | I - A*inv(A) | / ( |A| * |inv(A)| * n * ulp ) */
@@ -350,13 +351,13 @@ bool check_getri_accuracy(int64_t n, std::vector<fp> A, int64_t lda, std::vector
 }
 
 template <typename fp>
-bool check_getrs_accuracy(oneapi::mkl::transpose transa, int64_t n, int64_t nrhs,
+bool check_getrs_accuracy(oneapi::math::transpose transa, int64_t n, int64_t nrhs,
                           const std::vector<fp>& B, int64_t ldb, const std::vector<fp>& A_initial,
                           int64_t lda, std::vector<fp> B_initial) {
     using fp_real = typename complex_info<fp>::real_type;
 
     // Compute A*X - B. Store result in B_initial
-    reference::gemm(transa, oneapi::mkl::transpose::nontrans, n, nrhs, n, -1.0, A_initial.data(),
+    reference::gemm(transa, oneapi::math::transpose::nontrans, n, nrhs, n, -1.0, A_initial.data(),
                     lda, B.data(), ldb, 1.0, B_initial.data(), ldb);
 
     // Compute norm residual |A*X - B|
@@ -384,25 +385,26 @@ bool check_getrs_accuracy(oneapi::mkl::transpose transa, int64_t n, int64_t nrhs
 }
 
 template <typename fp>
-bool check_or_un_gbr_accuracy(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k,
+bool check_or_un_gbr_accuracy(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k,
                               const std::vector<fp>& Q, int64_t ldq) {
     bool result = true;
 
-    if (vect == oneapi::mkl::generate::Q) {
+    if (vect == oneapi::math::generate::Q) {
         int64_t rows_Q = m;
         int64_t cols_Q = (m >= k) ? n : m;
 
         /* | I - Q'Q | < m O(eps) */
         std::vector<fp> QQ(cols_Q * cols_Q);
         int64_t ldqq = cols_Q;
-        reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, cols_Q,
-                        cols_Q, rows_Q, 1.0, Q.data(), ldq, Q.data(), ldq, 0.0, QQ.data(), ldqq);
+        reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                        cols_Q, cols_Q, rows_Q, 1.0, Q.data(), ldq, Q.data(), ldq, 0.0, QQ.data(),
+                        ldqq);
         if (!rel_id_err_check(cols_Q, QQ, ldqq)) {
             test_log::lout << "Q Orthogonality check failed" << std::endl;
             result = false;
         }
     }
-    else { /* vect == oneapi::mkl::generate::P */
+    else { /* vect == oneapi::math::generate::P */
         auto& P = Q;
         auto& ldp = ldq;
         int64_t rows_P = (k < n) ? m : n;
@@ -411,8 +413,9 @@ bool check_or_un_gbr_accuracy(oneapi::mkl::generate vect, int64_t m, int64_t n,
         /* | I - (P')(P')' | < m O(eps) */
         std::vector<fp> PP(rows_P * rows_P);
         int64_t ldpp = rows_P;
-        reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, rows_P,
-                        rows_P, cols_P, 1.0, P.data(), ldp, P.data(), ldp, 0.0, PP.data(), ldpp);
+        reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans,
+                        rows_P, rows_P, cols_P, 1.0, P.data(), ldp, P.data(), ldp, 0.0, PP.data(),
+                        ldpp);
         if (!rel_id_err_check(rows_P, PP, ldpp)) {
             test_log::lout << "P^t Orthogonality check failed" << std::endl;
             result = false;
@@ -428,7 +431,7 @@ bool check_or_un_gqr_accuracy(int64_t m, int64_t n, const std::vector<fp>& Q, in
     /* | I - Q'Q | < m O(eps) */
     std::vector<fp> QQ(n * n);
     int64_t ldqq = n;
-    reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n, n, m,
+    reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, n, m,
                     1.0, Q.data(), ldq, Q.data(), ldq, 0.0, QQ.data(), ldqq);
     if (!rel_id_err_check(n, QQ, n)) {
         test_log::lout << "Orthogonality check failed" << std::endl;
@@ -444,7 +447,7 @@ bool check_or_un_gtr_accuracy(int64_t n, const std::vector<fp>& Q, int64_t ldq)
     /* | I - Q'Q | < m O(eps) */
     std::vector<fp> QQ(n * n);
     int64_t ldqq = n;
-    reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n, n, n,
+    reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, n, n,
                     1.0, Q.data(), ldq, Q.data(), ldq, 0.0, QQ.data(), ldqq);
     if (!rel_id_err_check(n, QQ, n)) {
         test_log::lout << "Orthogonality check failed" << std::endl;
@@ -455,7 +458,7 @@ bool check_or_un_gtr_accuracy(int64_t n, const std::vector<fp>& Q, int64_t ldq)
 
 template <typename fp>
 bool check_potrf_accuracy(const std::vector<fp>& init, const std::vector<fp>& sol,
-                          oneapi::mkl::uplo uplo, int64_t n, int64_t lda) {
+                          oneapi::math::uplo uplo, int64_t n, int64_t lda) {
     using fp_real = typename complex_info<fp>::real_type;
 
     std::vector<fp> ref(init);
@@ -464,7 +467,7 @@ bool check_potrf_accuracy(const std::vector<fp>& init, const std::vector<fp>& so
     fp_real eps = reference::lamch<fp_real>('e');
     fp_real error, max_error = 0;
     bool lower =
-        (uplo == oneapi::mkl::uplo::
+        (uplo == oneapi::math::uplo::
                      upper); // lower for row-major (which is this source) is upper for column major
     bool result = true;
     // Check solution values are inside allowed error bounds derived in:
@@ -490,15 +493,15 @@ bool check_potrf_accuracy(const std::vector<fp>& init, const std::vector<fp>& so
 }
 
 template <typename fp>
-bool check_potrs_accuracy(oneapi::mkl::uplo uplo, int64_t n, int64_t nrhs, const std::vector<fp>& B,
-                          int64_t ldb, std::vector<fp> A_initial, int64_t lda,
-                          std::vector<fp> B_initial) {
+bool check_potrs_accuracy(oneapi::math::uplo uplo, int64_t n, int64_t nrhs,
+                          const std::vector<fp>& B, int64_t ldb, std::vector<fp> A_initial,
+                          int64_t lda, std::vector<fp> B_initial) {
     using fp_real = typename complex_info<fp>::real_type;
 
     hermitian_to_full(uplo, n, A_initial, lda);
     // Compute A*X - B. Store result in B_initial
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n, nrhs, n,
-                    -1.0, A_initial.data(), lda, B.data(), ldb, 1.0, B_initial.data(), ldb);
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, nrhs,
+                    n, -1.0, A_initial.data(), lda, B.data(), ldb, 1.0, B_initial.data(), ldb);
 
     // Compute norm residual |A*X - B|
     fp_real norm_residual = reference::lange('1', n, nrhs, B_initial.data(), ldb);
@@ -525,7 +528,7 @@ bool check_potrs_accuracy(oneapi::mkl::uplo uplo, int64_t n, int64_t nrhs, const
 }
 
 template <typename fp>
-bool check_sy_he_evd_accuracy(oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int64_t n,
+bool check_sy_he_evd_accuracy(oneapi::math::job jobz, oneapi::math::uplo uplo, int64_t n,
                               const std::vector<fp>& A, int64_t lda,
                               const std::vector<typename complex_info<fp>::real_type>& w,
                               std::vector<fp> A_initial) {
@@ -540,10 +543,10 @@ bool check_sy_he_evd_accuracy(oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int
     std::vector<fp_real> D_ref(n);
 
     if constexpr (complex_info<fp>::is_complex)
-        reference::heevd(oneapi::mkl::job::novec, uplo, n, std::vector<fp>(A_initial).data(), lda,
+        reference::heevd(oneapi::math::job::novec, uplo, n, std::vector<fp>(A_initial).data(), lda,
                          D_ref.data());
     else
-        reference::syevd(oneapi::mkl::job::novec, uplo, n, std::vector<fp>(A_initial).data(), lda,
+        reference::syevd(oneapi::math::job::novec, uplo, n, std::vector<fp>(A_initial).data(), lda,
                          D_ref.data());
 
     if (!rel_vec_err_check(n, D_ref, D, 10.0)) {
@@ -551,7 +554,7 @@ bool check_sy_he_evd_accuracy(oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int
         result = false;
     }
 
-    if (oneapi::mkl::job::vec == jobz) {
+    if (oneapi::math::job::vec == jobz) {
         /* |A - Z D Z'| < |A| O(eps) */
         std::vector<fp> ZD(n * n);
         int64_t ldzd = n;
@@ -560,7 +563,7 @@ bool check_sy_he_evd_accuracy(oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int
         for (int64_t col = 0; col < n; col++)
             for (int64_t row = 0; row < n; row++)
                 ZD[row + col * ldzd] = Z[row + col * ldz] * D[col];
-        reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, n, n,
+        reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, n, n,
                         n, 1.0, ZD.data(), ldzd, Z.data(), ldz, 0.0, ZDZ.data(), ldzdz);
 
         if (!rel_mat_err_check(n, n, A_initial, lda, ZDZ, ldzdz)) {
@@ -571,9 +574,9 @@ bool check_sy_he_evd_accuracy(oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int
         /* |I - Z Z'| < n O(eps) */
         std::vector<fp> ZZ(n * n);
         int64_t ldzz = n;
-        reference::sy_he_rk(oneapi::mkl::uplo::upper, oneapi::mkl::transpose::nontrans, n, n, 1.0,
+        reference::sy_he_rk(oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, n, n, 1.0,
                             Z.data(), ldz, 0.0, ZZ.data(), ldzz);
-        hermitian_to_full(oneapi::mkl::uplo::upper, n, ZZ, ldzz);
+        hermitian_to_full(oneapi::math::uplo::upper, n, ZZ, ldzz);
         if (!rel_id_err_check(n, ZZ, ldzz)) {
             test_log::lout << "Orthogonality check failed" << std::endl;
             result = false;
@@ -583,18 +586,18 @@ bool check_sy_he_evd_accuracy(oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int
 }
 
 template <typename fp>
-bool check_trtrs_accuracy(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                          oneapi::mkl::diag diag, int64_t n, int64_t nrhs, std::vector<fp> A,
+bool check_trtrs_accuracy(oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                          oneapi::math::diag diag, int64_t n, int64_t nrhs, std::vector<fp> A,
                           int64_t lda, const std::vector<fp>& B, int64_t ldb,
                           const std::vector<fp>& B_initial) {
     using fp_real = typename complex_info<fp>::real_type;
     fp_real threshold = 10.0;
 
     /* |A x - b| = |A (x-x_0)| < |A| |x-x0| < |A| |x| cond(A) O(eps) */
-    if (diag == oneapi::mkl::diag::unit)
+    if (diag == oneapi::math::diag::unit)
         for (int64_t d = 0; d < n; d++)
             A[d + d * lda] = 1.0;
-    if (uplo == oneapi::mkl::uplo::upper)
+    if (uplo == oneapi::math::uplo::upper)
         for (int64_t col = 0; col < n; col++)
             for (int64_t row = col + 1; row < n; row++)
                 A[row + col * lda] = 0.0;
@@ -607,7 +610,7 @@ bool check_trtrs_accuracy(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
     auto norm_x = reference::lange('I', n, nrhs, B.data(), ldb);
 
     fp_real cond_A;
-    if (diag == oneapi::mkl::diag::unit)
+    if (diag == oneapi::math::diag::unit)
         cond_A = 1.0;
     else {
         fp_real min = std::abs(A[0]);
@@ -626,7 +629,7 @@ bool check_trtrs_accuracy(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
 
     std::vector<fp> residual(n * nrhs);
     int64_t ldr = n;
-    reference::gemm(trans, oneapi::mkl::transpose::nontrans, n, nrhs, n, 1.0, A.data(), lda,
+    reference::gemm(trans, oneapi::math::transpose::nontrans, n, nrhs, n, 1.0, A.data(), lda,
                     B.data(), ldb, 0.0, residual.data(), ldr);
     for (int64_t col = 0; col < nrhs; col++)
         for (int64_t row = 0; row < n; row++)
diff --git a/tests/unit_tests/lapack/include/lapack_common.hpp b/tests/unit_tests/lapack/include/lapack_common.hpp
index 1cebb7553..5df950392 100644
--- a/tests/unit_tests/lapack/include/lapack_common.hpp
+++ b/tests/unit_tests/lapack/include/lapack_common.hpp
@@ -31,7 +31,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/types.hpp"
+#include "oneapi/math/types.hpp"
 
 namespace test_log {
 
@@ -123,9 +123,9 @@ inline std::complex<double> rand_scalar(uint64_t& seed) {
 }
 
 template <typename fp>
-void rand_matrix(uint64_t& seed, oneapi::mkl::transpose trans, int64_t m, int64_t n,
+void rand_matrix(uint64_t& seed, oneapi::math::transpose trans, int64_t m, int64_t n,
                  std::vector<fp>& M, int64_t ld, int64_t offset = 0) {
-    if (trans == oneapi::mkl::transpose::nontrans)
+    if (trans == oneapi::math::transpose::nontrans)
         for (int64_t col = 0; col < n; col++)
             for (int64_t row = 0; row < m; row++)
                 M[offset + row + col * ld] = rand_scalar<fp>(seed);
@@ -136,12 +136,12 @@ void rand_matrix(uint64_t& seed, oneapi::mkl::transpose trans, int64_t m, int64_
 }
 
 template <typename fp>
-void rand_matrix_diag_dom(uint64_t& seed, oneapi::mkl::transpose trans, int64_t m, int64_t n,
+void rand_matrix_diag_dom(uint64_t& seed, oneapi::math::transpose trans, int64_t m, int64_t n,
                           std::vector<fp>& M, int64_t ld, int64_t offset = 0) {
     using fp_real = typename complex_info<fp>::real_type;
     int64_t minsh;
     minsh = std::min(m, n);
-    if (trans == oneapi::mkl::transpose::nontrans)
+    if (trans == oneapi::math::transpose::nontrans)
         for (int64_t col = 0; col < n; col++)
             for (int64_t row = 0; row < m; row++) {
                 M[offset + row + col * ld] = rand_scalar<fp>(seed);
@@ -158,11 +158,11 @@ void rand_matrix_diag_dom(uint64_t& seed, oneapi::mkl::transpose trans, int64_t
 }
 
 template <typename fp>
-void rand_symmetric_matrix(uint64_t& seed, oneapi::mkl::uplo uplo, int64_t n, std::vector<fp>& M,
+void rand_symmetric_matrix(uint64_t& seed, oneapi::math::uplo uplo, int64_t n, std::vector<fp>& M,
                            int64_t ld, int64_t offset = 0) {
     using fp_real = typename complex_info<fp>::real_type;
 
-    if (uplo == oneapi::mkl::uplo::upper)
+    if (uplo == oneapi::math::uplo::upper)
         for (int64_t col = 0; col < n; col++)
             for (int64_t row = 0; row <= col; row++)
                 M[offset + row + col * ld] = rand_scalar<fp>(seed);
@@ -173,7 +173,7 @@ void rand_symmetric_matrix(uint64_t& seed, oneapi::mkl::uplo uplo, int64_t n, st
 }
 
 template <typename fp>
-void rand_hermitian_matrix(uint64_t& seed, oneapi::mkl::uplo uplo, int64_t n, std::vector<fp>& M,
+void rand_hermitian_matrix(uint64_t& seed, oneapi::math::uplo uplo, int64_t n, std::vector<fp>& M,
                            int64_t ld, int64_t offset = 0) {
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -183,7 +183,7 @@ void rand_hermitian_matrix(uint64_t& seed, oneapi::mkl::uplo uplo, int64_t n, st
 }
 
 template <typename fp>
-void rand_pos_def_matrix(uint64_t& seed, oneapi::mkl::uplo uplo, int64_t n, std::vector<fp>& M,
+void rand_pos_def_matrix(uint64_t& seed, oneapi::math::uplo uplo, int64_t n, std::vector<fp>& M,
                          int64_t ld, int64_t offset = 0) {
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -194,8 +194,8 @@ void rand_pos_def_matrix(uint64_t& seed, oneapi::mkl::uplo uplo, int64_t n, std:
 }
 
 template <typename fp>
-void symmetric_to_full(oneapi::mkl::uplo uplo, int64_t n, std::vector<fp>& A, int64_t lda) {
-    if (oneapi::mkl::uplo::upper == uplo)
+void symmetric_to_full(oneapi::math::uplo uplo, int64_t n, std::vector<fp>& A, int64_t lda) {
+    if (oneapi::math::uplo::upper == uplo)
         for (int64_t col = 0; col < n; col++)
             for (int64_t row = col + 1; row < n; row++)
                 A[row + col * lda] = A[col + row * lda];
@@ -207,10 +207,10 @@ void symmetric_to_full(oneapi::mkl::uplo uplo, int64_t n, std::vector<fp>& A, in
 }
 
 template <typename fp>
-void hermitian_to_full(oneapi::mkl::uplo uplo, int64_t n, std::vector<fp>& A, int64_t lda) {
+void hermitian_to_full(oneapi::math::uplo uplo, int64_t n, std::vector<fp>& A, int64_t lda) {
     for (int64_t diag = 0; diag < n; diag++)
         A[diag + diag * lda] = get_real(A[diag + diag * lda]);
-    if (oneapi::mkl::uplo::upper == uplo)
+    if (oneapi::math::uplo::upper == uplo)
         for (int64_t col = 0; col < n; col++)
             for (int64_t row = col + 1; row < n; row++)
                 A[row + col * lda] = get_conj(A[col + row * lda]);
diff --git a/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp b/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp
index 07ce554e8..2f500274c 100644
--- a/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp
+++ b/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp
@@ -27,247 +27,242 @@
 #endif
 #define WeirdNEC
 extern "C" {
-#ifdef USE_MKLREF
-#include "mkl_cblas.h"
-#include "mkl_lapacke.h"
-#else
 #include "cblas.h"
 #include "lapacke.h"
-#endif
 }
 static_assert(sizeof(lapack_int) == 8);
 static_assert(sizeof(CBLAS_INT) == 8);
 
 namespace reference {
-inline CBLAS_TRANSPOSE cblas_trans(oneapi::mkl::transpose t) {
-    if (t == oneapi::mkl::transpose::nontrans)
+inline CBLAS_TRANSPOSE cblas_trans(oneapi::math::transpose t) {
+    if (t == oneapi::math::transpose::nontrans)
         return CblasNoTrans;
-    if (t == oneapi::mkl::transpose::trans)
+    if (t == oneapi::math::transpose::trans)
         return CblasTrans;
-    if (t == oneapi::mkl::transpose::conjtrans)
+    if (t == oneapi::math::transpose::conjtrans)
         return CblasConjTrans;
     return CblasNoTrans;
 }
-inline CBLAS_UPLO cblas_uplo(oneapi::mkl::uplo u) {
-    if (u == oneapi::mkl::uplo::upper)
+inline CBLAS_UPLO cblas_uplo(oneapi::math::uplo u) {
+    if (u == oneapi::math::uplo::upper)
         return CblasUpper;
-    if (u == oneapi::mkl::uplo::lower)
+    if (u == oneapi::math::uplo::lower)
         return CblasLower;
     return CblasUpper;
 }
-inline CBLAS_DIAG cblas_diag(oneapi::mkl::diag d) {
-    if (d == oneapi::mkl::diag::nonunit)
+inline CBLAS_DIAG cblas_diag(oneapi::math::diag d) {
+    if (d == oneapi::math::diag::nonunit)
         return CblasNonUnit;
-    if (d == oneapi::mkl::diag::unit)
+    if (d == oneapi::math::diag::unit)
         return CblasUnit;
     return CblasNonUnit;
 }
 inline CBLAS_SIDE cblas_side(const char* c) {
     return *c == 'R' || *c == 'r' ? CblasRight : CblasLeft;
 }
-inline CBLAS_SIDE cblas_side(oneapi::mkl::side s) {
-    if (s == oneapi::mkl::side::left)
+inline CBLAS_SIDE cblas_side(oneapi::math::side s) {
+    if (s == oneapi::math::side::left)
         return CblasLeft;
-    if (s == oneapi::mkl::side::right)
+    if (s == oneapi::math::side::right)
         return CblasRight;
     return CblasLeft;
 }
-inline char to_char(oneapi::mkl::transpose t) {
-    if (t == oneapi::mkl::transpose::nontrans)
+inline char to_char(oneapi::math::transpose t) {
+    if (t == oneapi::math::transpose::nontrans)
         return 'N';
-    if (t == oneapi::mkl::transpose::trans)
+    if (t == oneapi::math::transpose::trans)
         return 'T';
-    if (t == oneapi::mkl::transpose::conjtrans)
+    if (t == oneapi::math::transpose::conjtrans)
         return 'C';
     return 'N';
 }
-inline char to_char(oneapi::mkl::offset t) {
-    if (t == oneapi::mkl::offset::fix)
+inline char to_char(oneapi::math::offset t) {
+    if (t == oneapi::math::offset::fix)
         return 'F';
-    if (t == oneapi::mkl::offset::row)
+    if (t == oneapi::math::offset::row)
         return 'R';
-    if (t == oneapi::mkl::offset::column)
+    if (t == oneapi::math::offset::column)
         return 'C';
     return 'N';
 }
 
-inline char to_char(oneapi::mkl::uplo u) {
-    if (u == oneapi::mkl::uplo::upper)
+inline char to_char(oneapi::math::uplo u) {
+    if (u == oneapi::math::uplo::upper)
         return 'U';
-    if (u == oneapi::mkl::uplo::lower)
+    if (u == oneapi::math::uplo::lower)
         return 'L';
     return 'U';
 }
 
-inline char to_char(oneapi::mkl::diag d) {
-    if (d == oneapi::mkl::diag::nonunit)
+inline char to_char(oneapi::math::diag d) {
+    if (d == oneapi::math::diag::nonunit)
         return 'N';
-    if (d == oneapi::mkl::diag::unit)
+    if (d == oneapi::math::diag::unit)
         return 'U';
     return 'N';
 }
 
-inline char to_char(oneapi::mkl::side s) {
-    if (s == oneapi::mkl::side::left)
+inline char to_char(oneapi::math::side s) {
+    if (s == oneapi::math::side::left)
         return 'L';
-    if (s == oneapi::mkl::side::right)
+    if (s == oneapi::math::side::right)
         return 'R';
     return 'L';
 }
 
-inline char to_char(oneapi::mkl::job j) {
-    if (j == oneapi::mkl::job::novec)
+inline char to_char(oneapi::math::job j) {
+    if (j == oneapi::math::job::novec)
         return 'N';
-    if (j == oneapi::mkl::job::vec)
+    if (j == oneapi::math::job::vec)
         return 'V';
-    if (j == oneapi::mkl::job::updatevec)
+    if (j == oneapi::math::job::updatevec)
         return 'U';
-    if (j == oneapi::mkl::job::allvec)
+    if (j == oneapi::math::job::allvec)
         return 'A';
-    if (j == oneapi::mkl::job::somevec)
+    if (j == oneapi::math::job::somevec)
         return 'S';
-    if (j == oneapi::mkl::job::overwritevec)
+    if (j == oneapi::math::job::overwritevec)
         return 'O';
     return 'N';
 }
-inline char to_char(oneapi::mkl::jobsvd j) {
-    if (j == oneapi::mkl::jobsvd::novec)
+inline char to_char(oneapi::math::jobsvd j) {
+    if (j == oneapi::math::jobsvd::novec)
         return 'N';
-    if (j == oneapi::mkl::jobsvd::vectors)
+    if (j == oneapi::math::jobsvd::vectors)
         return 'A';
-    if (j == oneapi::mkl::jobsvd::vectorsina)
+    if (j == oneapi::math::jobsvd::vectorsina)
         return 'O';
-    if (j == oneapi::mkl::jobsvd::somevec)
+    if (j == oneapi::math::jobsvd::somevec)
         return 'S';
     return 'N';
 }
-inline char to_char(oneapi::mkl::generate v) {
-    if (v == oneapi::mkl::generate::Q)
+inline char to_char(oneapi::math::generate v) {
+    if (v == oneapi::math::generate::Q)
         return 'Q';
-    if (v == oneapi::mkl::generate::P)
+    if (v == oneapi::math::generate::P)
         return 'P';
     return 'Q';
 }
 
-inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n,
-                 int64_t k, float alpha, const float* a, int64_t lda, const float* b, int64_t ldb,
-                 float beta, float* c, int64_t ldc) {
+inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m,
+                 int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b,
+                 int64_t ldb, float beta, float* c, int64_t ldc) {
     cblas_sgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, alpha, a, lda, b,
                 ldb, beta, c, ldc);
 }
-inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n,
-                 int64_t k, double alpha, const double* a, int64_t lda, const double* b,
+inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m,
+                 int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b,
                  int64_t ldb, double beta, double* c, int64_t ldc) {
     cblas_dgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, alpha, a, lda, b,
                 ldb, beta, c, ldc);
 }
-inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n,
-                 int64_t k, std::complex<float> alpha, const std::complex<float>* a, int64_t lda,
-                 const std::complex<float>* b, int64_t ldb, std::complex<float> beta,
+inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m,
+                 int64_t n, int64_t k, std::complex<float> alpha, const std::complex<float>* a,
+                 int64_t lda, const std::complex<float>* b, int64_t ldb, std::complex<float> beta,
                  std::complex<float>* c, int64_t ldc) {
     cblas_cgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void*)&alpha,
                 (void*)a, lda, (void*)(b), ldb, (void*)&beta, (void*)c, ldc);
 }
-inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n,
-                 int64_t k, std::complex<double> alpha, const std::complex<double>* a, int64_t lda,
-                 const std::complex<double>* b, int64_t ldb, std::complex<double> beta,
+inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m,
+                 int64_t n, int64_t k, std::complex<double> alpha, const std::complex<double>* a,
+                 int64_t lda, const std::complex<double>* b, int64_t ldb, std::complex<double> beta,
                  std::complex<double>* c, int64_t ldc) {
     cblas_zgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void*)&alpha,
                 (void*)a, lda, (void*)(b), ldb, (void*)&beta, (void*)c, ldc);
 }
 
-inline int64_t syevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda,
+inline int64_t syevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, float* a, int64_t lda,
                      float* w) {
     return LAPACKE_ssyevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, a, lda, w);
 }
-inline int64_t syevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda,
+inline int64_t syevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, double* a, int64_t lda,
                      double* w) {
     return LAPACKE_dsyevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, a, lda, w);
 }
 
-inline int64_t sygvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, float* a,
+inline int64_t sygvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, float* a,
                      int64_t lda, float* b, int64_t ldb, float* w) {
     return LAPACKE_ssygvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, a, lda, b, ldb, w);
 }
-inline int64_t sygvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, double* a,
+inline int64_t sygvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, double* a,
                      int64_t lda, double* b, int64_t ldb, double* w) {
     return LAPACKE_dsygvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, a, lda, b, ldb, w);
 }
 
-inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, float alpha,
+inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, float alpha,
                  const float* a, int64_t lda, float beta, float* c, int64_t ldc) {
     cblas_ssyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc);
 }
-inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, double alpha,
-                 const double* a, int64_t lda, double beta, double* c, int64_t ldc) {
+inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
+                 double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc) {
     cblas_dsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc);
 }
-inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k,
+inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
                  std::complex<float> alpha, const std::complex<float>* a, int64_t lda,
                  std::complex<float> beta, std::complex<float>* c, int64_t ldc) {
     cblas_csyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void*)&alpha, a, lda,
                 (void*)&beta, (void*)c, ldc);
 }
-inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k,
+inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
                  std::complex<double> alpha, const std::complex<double>* a, int64_t lda,
                  std::complex<double> beta, std::complex<double>* c, int64_t ldc) {
     cblas_zsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void*)&alpha, a, lda,
                 (void*)&beta, (void*)c, ldc);
 }
-inline void herk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, float alpha,
+inline void herk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, float alpha,
                  const std::complex<float>* a, int64_t lda, float beta, std::complex<float>* c,
                  int64_t ldc) {
     cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c,
                 ldc);
 }
-inline void herk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, double alpha,
-                 const std::complex<double>* a, int64_t lda, double beta, std::complex<double>* c,
-                 int64_t ldc) {
+inline void herk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
+                 double alpha, const std::complex<double>* a, int64_t lda, double beta,
+                 std::complex<double>* c, int64_t ldc) {
     cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c,
                 ldc);
 }
-inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k,
+inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
                      float alpha, const float* a, int64_t lda, float beta, float* c, int64_t ldc) {
     cblas_ssyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc);
 }
-inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k,
+inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
                      double alpha, const double* a, int64_t lda, double beta, double* c,
                      int64_t ldc) {
     cblas_dsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc);
 }
-inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k,
+inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
                      float alpha, const std::complex<float>* a, int64_t lda, float beta,
                      std::complex<float>* c, int64_t ldc) {
     cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c,
                 ldc);
 }
-inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k,
+inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k,
                      double alpha, const std::complex<double>* a, int64_t lda, double beta,
                      std::complex<double>* c, int64_t ldc) {
     cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c,
                 ldc);
 }
 
-inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa,
-                 oneapi::mkl::diag diag, int64_t m, int64_t n, float alpha, const float* a,
+inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa,
+                 oneapi::math::diag diag, int64_t m, int64_t n, float alpha, const float* a,
                  int64_t lda, float* b, int64_t ldb) {
     cblas_strmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa),
                 cblas_diag(diag), m, n, alpha, a, lda, b, ldb);
 }
-inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa,
-                 oneapi::mkl::diag diag, int64_t m, int64_t n, double alpha, const double* a,
+inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa,
+                 oneapi::math::diag diag, int64_t m, int64_t n, double alpha, const double* a,
                  int64_t lda, double* b, int64_t ldb) {
     cblas_dtrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa),
                 cblas_diag(diag), m, n, alpha, a, lda, b, ldb);
 }
-inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa,
-                 oneapi::mkl::diag diag, int64_t m, int64_t n, std::complex<float> alpha,
+inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa,
+                 oneapi::math::diag diag, int64_t m, int64_t n, std::complex<float> alpha,
                  const std::complex<float>* a, int64_t lda, std::complex<float>* b, int64_t ldb) {
     cblas_ctrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa),
                 cblas_diag(diag), m, n, (void*)&alpha, (void*)(a), lda, (void*)(b), ldb);
 }
-inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa,
-                 oneapi::mkl::diag diag, int64_t m, int64_t n, std::complex<double> alpha,
+inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa,
+                 oneapi::math::diag diag, int64_t m, int64_t n, std::complex<double> alpha,
                  const std::complex<double>* a, int64_t lda, std::complex<double>* b, int64_t ldb) {
     cblas_ztrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa),
                 cblas_diag(diag), m, n, (void*)&alpha, (void*)(a), lda, (void*)(b), ldb);
@@ -314,29 +309,29 @@ inline double lange(char norm, int64_t m, int64_t n, const std::complex<double>*
                           reinterpret_cast<const lapack_complex_double*>(a), lda);
 }
 
-inline float lanhe(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex<float>* a,
+inline float lanhe(char norm, oneapi::math::uplo u, int64_t n, const std::complex<float>* a,
                    int64_t lda) {
     return LAPACKE_clanhe(LAPACK_COL_MAJOR, norm, to_char(u), n,
                           reinterpret_cast<const lapack_complex_float*>(a), lda);
 }
-inline double lanhe(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex<double>* a,
+inline double lanhe(char norm, oneapi::math::uplo u, int64_t n, const std::complex<double>* a,
                     int64_t lda) {
     return LAPACKE_zlanhe(LAPACK_COL_MAJOR, norm, to_char(u), n,
                           reinterpret_cast<const lapack_complex_double*>(a), lda);
 }
 
-inline float lansy(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex<float>* a,
+inline float lansy(char norm, oneapi::math::uplo u, int64_t n, const std::complex<float>* a,
                    int64_t lda) {
     return LAPACKE_clansy(LAPACK_COL_MAJOR, norm, to_char(u), n,
                           reinterpret_cast<const lapack_complex_float*>(a), lda);
 }
-inline double lansy(char norm, oneapi::mkl::uplo u, int64_t n, const double* a, int64_t lda) {
+inline double lansy(char norm, oneapi::math::uplo u, int64_t n, const double* a, int64_t lda) {
     return LAPACKE_dlansy(LAPACK_COL_MAJOR, norm, to_char(u), n, a, lda);
 }
-inline float lansy(char norm, oneapi::mkl::uplo u, int64_t n, const float* a, int64_t lda) {
+inline float lansy(char norm, oneapi::math::uplo u, int64_t n, const float* a, int64_t lda) {
     return LAPACKE_slansy(LAPACK_COL_MAJOR, norm, to_char(u), n, a, lda);
 }
-inline double lansy(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex<double>* a,
+inline double lansy(char norm, oneapi::math::uplo u, int64_t n, const std::complex<double>* a,
                     int64_t lda) {
     return LAPACKE_zlansy(LAPACK_COL_MAJOR, norm, to_char(u), n,
                           reinterpret_cast<const lapack_complex_double*>(a), lda);
@@ -362,43 +357,43 @@ inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex<double>* a
                           reinterpret_cast<const lapack_complex_double*>(a), lda,
                           reinterpret_cast<lapack_complex_double*>(b), ldb);
 }
-inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const std::complex<float>* a,
+inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const std::complex<float>* a,
                      int64_t lda, std::complex<float>* b, int64_t ldb) {
     return LAPACKE_clacpy(LAPACK_COL_MAJOR, to_char(u), m, n,
                           reinterpret_cast<const lapack_complex_float*>(a), lda,
                           reinterpret_cast<lapack_complex_float*>(b), ldb);
 }
-inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const double* a, int64_t lda,
+inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const double* a, int64_t lda,
                      double* b, int64_t ldb) {
     return LAPACKE_dlacpy(LAPACK_COL_MAJOR, to_char(u), m, n, a, lda, b, ldb);
 }
-inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const float* a, int64_t lda,
+inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const float* a, int64_t lda,
                      float* b, int64_t ldb) {
     return LAPACKE_slacpy(LAPACK_COL_MAJOR, to_char(u), m, n, a, lda, b, ldb);
 }
-inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const std::complex<double>* a,
+inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const std::complex<double>* a,
                      int64_t lda, std::complex<double>* b, int64_t ldb) {
     return LAPACKE_zlacpy(LAPACK_COL_MAJOR, to_char(u), m, n,
                           reinterpret_cast<const lapack_complex_double*>(a), lda,
                           reinterpret_cast<lapack_complex_double*>(b), ldb);
 }
 
-inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, std::complex<float> alpha,
+inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, std::complex<float> alpha,
                      std::complex<float> beta, std::complex<float>* a, int64_t lda) {
     return LAPACKE_claset(LAPACK_COL_MAJOR, to_char(u), m, n,
                           reinterpret_cast<lapack_complex_float&>(alpha),
                           reinterpret_cast<lapack_complex_float&>(beta),
                           reinterpret_cast<lapack_complex_float*>(a), lda);
 }
-inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, double alpha, double beta,
+inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, double alpha, double beta,
                      double* a, int64_t lda) {
     return LAPACKE_dlaset(LAPACK_COL_MAJOR, to_char(u), m, n, alpha, beta, a, lda);
 }
-inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, float alpha, float beta, float* a,
+inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, float alpha, float beta, float* a,
                      int64_t lda) {
     return LAPACKE_slaset(LAPACK_COL_MAJOR, to_char(u), m, n, alpha, beta, a, lda);
 }
-inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, std::complex<double> alpha,
+inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, std::complex<double> alpha,
                      std::complex<double> beta, std::complex<double>* a, int64_t lda) {
     return LAPACKE_zlaset(LAPACK_COL_MAJOR, to_char(u), m, n,
                           reinterpret_cast<lapack_complex_double&>(alpha),
@@ -481,7 +476,7 @@ inline int64_t gerqf(int64_t m, int64_t n, std::complex<double>* a, int64_t lda,
                           reinterpret_cast<lapack_complex_double*>(tau));
 }
 
-inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n,
+inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n,
                      std::complex<float>* a, int64_t lda, float* s, std::complex<float>* u,
                      int64_t ldu, std::complex<float>* vt, int64_t ldvt, float* superb) {
     return LAPACKE_cgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n,
@@ -489,19 +484,19 @@ inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_
                           reinterpret_cast<lapack_complex_float*>(u), ldu,
                           reinterpret_cast<lapack_complex_float*>(vt), ldvt, superb);
 }
-inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n,
+inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n,
                      double* a, int64_t lda, double* s, double* u, int64_t ldu, double* vt,
                      int64_t ldvt, double* superb) {
     return LAPACKE_dgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, a, lda, s, u, ldu,
                           vt, ldvt, superb);
 }
-inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n,
+inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n,
                      float* a, int64_t lda, float* s, float* u, int64_t ldu, float* vt,
                      int64_t ldvt, float* superb) {
     return LAPACKE_sgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, a, lda, s, u, ldu,
                           vt, ldvt, superb);
 }
-inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n,
+inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n,
                      std::complex<double>* a, int64_t lda, double* s, std::complex<double>* u,
                      int64_t ldu, std::complex<double>* vt, int64_t ldvt, double* superb) {
     return LAPACKE_zgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n,
@@ -525,25 +520,25 @@ inline int64_t getrf(int64_t m, int64_t n, std::complex<double>* a, int64_t lda,
                           reinterpret_cast<lapack_int*>(ipiv));
 }
 
-inline int64_t heevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, std::complex<float>* a,
+inline int64_t heevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, std::complex<float>* a,
                      int64_t lda, float* w) {
     return LAPACKE_cheevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda, w);
 }
-inline int64_t heevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, std::complex<double>* a,
+inline int64_t heevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, std::complex<double>* a,
                      int64_t lda, double* w) {
     return LAPACKE_zheevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n,
                           reinterpret_cast<lapack_complex_double*>(a), lda, w);
 }
 
-inline int64_t hegvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n,
+inline int64_t hegvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n,
                      std::complex<float>* a, int64_t lda, std::complex<float>* b, int64_t ldb,
                      float* w) {
     return LAPACKE_chegvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda,
                           reinterpret_cast<lapack_complex_float*>(b), ldb, w);
 }
-inline int64_t hegvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n,
+inline int64_t hegvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n,
                      std::complex<double>* a, int64_t lda, std::complex<double>* b, int64_t ldb,
                      double* w) {
     return LAPACKE_zhegvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n,
@@ -551,46 +546,46 @@ inline int64_t hegvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int
                           reinterpret_cast<lapack_complex_double*>(b), ldb, w);
 }
 
-inline int64_t hetrd(oneapi::mkl::uplo u, int64_t n, std::complex<float>* a, int64_t lda, float* d,
+inline int64_t hetrd(oneapi::math::uplo u, int64_t n, std::complex<float>* a, int64_t lda, float* d,
                      float* e, std::complex<float>* tau) {
     return LAPACKE_chetrd(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda, d, e,
                           reinterpret_cast<lapack_complex_float*>(tau));
 }
-inline int64_t hetrd(oneapi::mkl::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
+inline int64_t hetrd(oneapi::math::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
                      double* d, double* e, std::complex<double>* tau) {
     return LAPACKE_zhetrd(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_double*>(a), lda, d, e,
                           reinterpret_cast<lapack_complex_double*>(tau));
 }
 
-inline int64_t hetrf(oneapi::mkl::uplo u, int64_t n, std::complex<float>* a, int64_t lda,
+inline int64_t hetrf(oneapi::math::uplo u, int64_t n, std::complex<float>* a, int64_t lda,
                      int64_t* ipiv) {
     return LAPACKE_chetrf(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda,
                           reinterpret_cast<lapack_int*>(ipiv));
 }
-inline int64_t hetrf(oneapi::mkl::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
+inline int64_t hetrf(oneapi::math::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
                      int64_t* ipiv) {
     return LAPACKE_zhetrf(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_double*>(a), lda,
                           reinterpret_cast<lapack_int*>(ipiv));
 }
 
-inline int64_t ungtr(oneapi::mkl::uplo u, int64_t n, std::complex<float>* a, int64_t lda,
+inline int64_t ungtr(oneapi::math::uplo u, int64_t n, std::complex<float>* a, int64_t lda,
                      const std::complex<float>* tau) {
     return LAPACKE_cungtr(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda,
                           reinterpret_cast<const lapack_complex_float*>(tau));
 }
-inline int64_t ungtr(oneapi::mkl::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
+inline int64_t ungtr(oneapi::math::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
                      const std::complex<double>* tau) {
     return LAPACKE_zungtr(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_double*>(a), lda,
                           reinterpret_cast<const lapack_complex_double*>(tau));
 }
 
-inline int64_t unmtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
+inline int64_t unmtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans,
                      int64_t m, int64_t n, const std::complex<float>* a, int64_t lda,
                      const std::complex<float>* tau, std::complex<float>* c, int64_t ldc) {
     return LAPACKE_cunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n,
@@ -598,7 +593,7 @@ inline int64_t unmtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::t
                           reinterpret_cast<const lapack_complex_float*>(tau),
                           reinterpret_cast<lapack_complex_float*>(c), ldc);
 }
-inline int64_t unmtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
+inline int64_t unmtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans,
                      int64_t m, int64_t n, const std::complex<double>* a, int64_t lda,
                      const std::complex<double>* tau, std::complex<double>* c, int64_t ldc) {
     return LAPACKE_zunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n,
@@ -607,90 +602,92 @@ inline int64_t unmtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::t
                           reinterpret_cast<lapack_complex_double*>(c), ldc);
 }
 
-inline int64_t orgtr(oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda, const double* tau) {
+inline int64_t orgtr(oneapi::math::uplo u, int64_t n, double* a, int64_t lda, const double* tau) {
     return LAPACKE_dorgtr(LAPACK_COL_MAJOR, to_char(u), n, a, lda, tau);
 }
-inline int64_t orgtr(oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda, const float* tau) {
+inline int64_t orgtr(oneapi::math::uplo u, int64_t n, float* a, int64_t lda, const float* tau) {
     return LAPACKE_sorgtr(LAPACK_COL_MAJOR, to_char(u), n, a, lda, tau);
 }
 
-inline int64_t ormtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
+inline int64_t ormtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans,
                      int64_t m, int64_t n, float* a, int64_t lda, const float* tau, float* c,
                      int64_t ldc) {
     return LAPACKE_sormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda,
                           tau, c, ldc);
 }
-inline int64_t ormtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
+inline int64_t ormtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans,
                      int64_t m, int64_t n, double* a, int64_t lda, const double* tau, double* c,
                      int64_t ldc) {
     return LAPACKE_dormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda,
                           tau, c, ldc);
 }
 
-inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
-                         int64_t m, int64_t n, float* a, int64_t lda, const float* tau, float* c,
-                         int64_t ldc) {
+inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u,
+                         oneapi::math::transpose trans, int64_t m, int64_t n, float* a, int64_t lda,
+                         const float* tau, float* c, int64_t ldc) {
     return LAPACKE_sormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda,
                           tau, c, ldc);
 }
-inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
-                         int64_t m, int64_t n, double* a, int64_t lda, const double* tau, double* c,
-                         int64_t ldc) {
+inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u,
+                         oneapi::math::transpose trans, int64_t m, int64_t n, double* a,
+                         int64_t lda, const double* tau, double* c, int64_t ldc) {
     return LAPACKE_dormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda,
                           tau, c, ldc);
 }
-inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
-                         int64_t m, int64_t n, std::complex<float>* a, int64_t lda,
-                         std::complex<float>* tau, std::complex<float>* c, int64_t ldc) {
+inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u,
+                         oneapi::math::transpose trans, int64_t m, int64_t n,
+                         std::complex<float>* a, int64_t lda, std::complex<float>* tau,
+                         std::complex<float>* c, int64_t ldc) {
     return LAPACKE_cunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n,
                           reinterpret_cast<lapack_complex_float*>(a), lda,
                           reinterpret_cast<lapack_complex_float*>(tau),
                           reinterpret_cast<lapack_complex_float*>(c), ldc);
 }
-inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans,
-                         int64_t m, int64_t n, std::complex<double>* a, int64_t lda,
-                         std::complex<double>* tau, std::complex<double>* c, int64_t ldc) {
+inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u,
+                         oneapi::math::transpose trans, int64_t m, int64_t n,
+                         std::complex<double>* a, int64_t lda, std::complex<double>* tau,
+                         std::complex<double>* c, int64_t ldc) {
     return LAPACKE_zunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n,
                           reinterpret_cast<lapack_complex_double*>(a), lda,
                           reinterpret_cast<lapack_complex_double*>(tau),
                           reinterpret_cast<lapack_complex_double*>(c), ldc);
 }
 
-inline int64_t sytrd(oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda, float* d, float* e,
+inline int64_t sytrd(oneapi::math::uplo u, int64_t n, float* a, int64_t lda, float* d, float* e,
                      float* tau) {
     return LAPACKE_ssytrd(LAPACK_COL_MAJOR, to_char(u), n, a, lda, d, e, tau);
 }
-inline int64_t sytrd(oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda, double* d, double* e,
+inline int64_t sytrd(oneapi::math::uplo u, int64_t n, double* a, int64_t lda, double* d, double* e,
                      double* tau) {
     return LAPACKE_dsytrd(LAPACK_COL_MAJOR, to_char(u), n, a, lda, d, e, tau);
 }
 
-inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda, int64_t* ipiv) {
+inline int64_t sytrf(oneapi::math::uplo u, int64_t n, float* a, int64_t lda, int64_t* ipiv) {
     return LAPACKE_ssytrf(LAPACK_COL_MAJOR, to_char(u), n, a, lda,
                           reinterpret_cast<lapack_int*>(ipiv));
 }
-inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda, int64_t* ipiv) {
+inline int64_t sytrf(oneapi::math::uplo u, int64_t n, double* a, int64_t lda, int64_t* ipiv) {
     return LAPACKE_dsytrf(LAPACK_COL_MAJOR, to_char(u), n, a, lda,
                           reinterpret_cast<lapack_int*>(ipiv));
 }
-inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, std::complex<float>* a, int64_t lda,
+inline int64_t sytrf(oneapi::math::uplo u, int64_t n, std::complex<float>* a, int64_t lda,
                      int64_t* ipiv) {
     return LAPACKE_csytrf(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda,
                           reinterpret_cast<lapack_int*>(ipiv));
 }
-inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
+inline int64_t sytrf(oneapi::math::uplo u, int64_t n, std::complex<double>* a, int64_t lda,
                      int64_t* ipiv) {
     return LAPACKE_zsytrf(LAPACK_COL_MAJOR, to_char(u), n,
                           reinterpret_cast<lapack_complex_double*>(a), lda,
                           reinterpret_cast<lapack_int*>(ipiv));
 }
 
-inline void orgbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, double* a,
+inline void orgbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, double* a,
                   int64_t lda, const double* tau) {
     LAPACKE_dorgbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, a, lda, tau);
 }
-inline void orgbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, float* a,
+inline void orgbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, float* a,
                   int64_t lda, const float* tau) {
     LAPACKE_sorgbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, a, lda, tau);
 }
@@ -713,17 +710,17 @@ inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex<double>*
                           lda, reinterpret_cast<const lapack_complex_double*>(tau));
 }
 
-inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const float* a, int64_t lda, const float* tau, float* c,
                          int64_t ldc) {
     return LAPACKE_sormqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc);
 }
-inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const double* a, int64_t lda, const double* tau, double* c,
                          int64_t ldc) {
     return LAPACKE_dormqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc);
 }
-inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const std::complex<float>* a, int64_t lda,
                          const std::complex<float>* tau, std::complex<float>* c, int64_t ldc) {
     return LAPACKE_cunmqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k,
@@ -731,7 +728,7 @@ inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t
                           reinterpret_cast<const lapack_complex_float*>(tau),
                           reinterpret_cast<lapack_complex_float*>(c), ldc);
 }
-inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const std::complex<double>* a, int64_t lda,
                          const std::complex<double>* tau, std::complex<double>* c, int64_t ldc) {
     return LAPACKE_zunmqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k,
@@ -758,17 +755,17 @@ inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex<double>*
                           lda, reinterpret_cast<const lapack_complex_double*>(tau));
 }
 
-inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const float* a, int64_t lda, const float* tau, float* c,
                          int64_t ldc) {
     return LAPACKE_sormrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc);
 }
-inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const double* a, int64_t lda, const double* tau, double* c,
                          int64_t ldc) {
     return LAPACKE_dormrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc);
 }
-inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const std::complex<float>* a, int64_t lda,
                          const std::complex<float>* tau, std::complex<float>* c, int64_t ldc) {
     return LAPACKE_cunmrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k,
@@ -776,7 +773,7 @@ inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t
                           reinterpret_cast<const lapack_complex_float*>(tau),
                           reinterpret_cast<lapack_complex_float*>(c), ldc);
 }
-inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n,
+inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n,
                          int64_t k, const std::complex<double>* a, int64_t lda,
                          const std::complex<double>* tau, std::complex<double>* c, int64_t ldc) {
     return LAPACKE_zunmrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k,
@@ -785,39 +782,39 @@ inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t
                           reinterpret_cast<lapack_complex_double*>(c), ldc);
 }
 
-inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, std::complex<float>* a,
+inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, std::complex<float>* a,
                      int64_t lda) {
     return LAPACKE_cpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda);
 }
-inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, double* a, int64_t lda) {
+inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, double* a, int64_t lda) {
     return LAPACKE_dpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda);
 }
-inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, float* a, int64_t lda) {
+inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, float* a, int64_t lda) {
     return LAPACKE_spotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda);
 }
-inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, std::complex<double>* a,
+inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, std::complex<double>* a,
                      int64_t lda) {
     return LAPACKE_zpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n,
                           reinterpret_cast<lapack_complex_double*>(a), lda);
 }
 
-inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs,
+inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs,
                      const std::complex<float>* a, int64_t lda, std::complex<float>* b,
                      int64_t ldb) {
     return LAPACKE_cpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs,
                           reinterpret_cast<const lapack_complex_float*>(a), lda,
                           reinterpret_cast<lapack_complex_float*>(b), ldb);
 }
-inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, const double* a,
+inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, const double* a,
                      int64_t lda, double* b, int64_t ldb) {
     return LAPACKE_dpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, a, lda, b, ldb);
 }
-inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, const float* a,
+inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, const float* a,
                      int64_t lda, float* b, int64_t ldb) {
     return LAPACKE_spotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, a, lda, b, ldb);
 }
-inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs,
+inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs,
                      const std::complex<double>* a, int64_t lda, std::complex<double>* b,
                      int64_t ldb) {
     return LAPACKE_zpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs,
@@ -825,18 +822,18 @@ inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs,
                           reinterpret_cast<lapack_complex_double*>(b), ldb);
 }
 
-inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, std::complex<float>* a,
+inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, std::complex<float>* a,
                      int64_t lda) {
     return LAPACKE_cpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n,
                           reinterpret_cast<lapack_complex_float*>(a), lda);
 }
-inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, double* a, int64_t lda) {
+inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, double* a, int64_t lda) {
     return LAPACKE_dpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda);
 }
-inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, float* a, int64_t lda) {
+inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, float* a, int64_t lda) {
     return LAPACKE_spotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda);
 }
-inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, std::complex<double>* a,
+inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, std::complex<double>* a,
                      int64_t lda) {
     return LAPACKE_zpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n,
                           reinterpret_cast<lapack_complex_double*>(a), lda);
@@ -863,40 +860,42 @@ inline int64_t laswp(int64_t n, std::complex<double>* a, int64_t lda, int64_t k1
                           k2, reinterpret_cast<const lapack_int*>(ipiv), incx);
 }
 
-inline void ungbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k,
+inline void ungbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k,
                   std::complex<float>* a, int64_t lda, const std::complex<float>* tau) {
     LAPACKE_cungbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k,
                    reinterpret_cast<lapack_complex_float*>(a), lda,
                    reinterpret_cast<const lapack_complex_float*>(tau));
 }
-inline void ungbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k,
+inline void ungbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k,
                   std::complex<double>* a, int64_t lda, const std::complex<double>* tau) {
     LAPACKE_zungbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k,
                    reinterpret_cast<lapack_complex_double*>(a), lda,
                    reinterpret_cast<const lapack_complex_double*>(tau));
 }
 
-inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                     int64_t n, int64_t nrhs, const float* a, int64_t lda, float* b, int64_t ldb) {
+inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                     oneapi::math::diag diag, int64_t n, int64_t nrhs, const float* a, int64_t lda,
+                     float* b, int64_t ldb) {
     return LAPACKE_strtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs,
                           a, lda, b, ldb);
 }
-inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                     int64_t n, int64_t nrhs, const double* a, int64_t lda, double* b,
-                     int64_t ldb) {
+inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                     oneapi::math::diag diag, int64_t n, int64_t nrhs, const double* a, int64_t lda,
+                     double* b, int64_t ldb) {
     return LAPACKE_dtrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs,
                           a, lda, b, ldb);
 }
-inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                     int64_t n, int64_t nrhs, const std::complex<float>* a, int64_t lda,
-                     std::complex<float>* b, int64_t ldb) {
+inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                     oneapi::math::diag diag, int64_t n, int64_t nrhs, const std::complex<float>* a,
+                     int64_t lda, std::complex<float>* b, int64_t ldb) {
     return LAPACKE_ctrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs,
                           reinterpret_cast<const lapack_complex_float*>(a), lda,
                           reinterpret_cast<lapack_complex_float*>(b), ldb);
 }
-inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag,
-                     int64_t n, int64_t nrhs, const std::complex<double>* a, int64_t lda,
-                     std::complex<double>* b, int64_t ldb) {
+inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                     oneapi::math::diag diag, int64_t n, int64_t nrhs,
+                     const std::complex<double>* a, int64_t lda, std::complex<double>* b,
+                     int64_t ldb) {
     return LAPACKE_ztrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs,
                           reinterpret_cast<const lapack_complex_double*>(a), lda,
                           reinterpret_cast<lapack_complex_double*>(b), ldb);
diff --git a/tests/unit_tests/lapack/include/lapack_test_controller.hpp b/tests/unit_tests/lapack/include/lapack_test_controller.hpp
index 918060959..3e21fa104 100644
--- a/tests/unit_tests/lapack/include/lapack_test_controller.hpp
+++ b/tests/unit_tests/lapack/include/lapack_test_controller.hpp
@@ -31,7 +31,7 @@
 #endif
 
 #include "lapack_common.hpp"
-#include "oneapi/mkl/exceptions.hpp"
+#include "oneapi/math/exceptions.hpp"
 
 template <class T>
 std::istream& operator>>(std::istream& is, T& t) {
@@ -40,31 +40,31 @@ std::istream& operator>>(std::istream& is, T& t) {
     t = static_cast<T>(i);
     return is;
 }
-inline std::ostream& operator<<(std::ostream& os, const oneapi::mkl::job& t) {
+inline std::ostream& operator<<(std::ostream& os, const oneapi::math::job& t) {
     os << static_cast<int64_t>(t);
     return os;
 }
-inline std::ostream& operator<<(std::ostream& os, const oneapi::mkl::jobsvd& t) {
+inline std::ostream& operator<<(std::ostream& os, const oneapi::math::jobsvd& t) {
     os << static_cast<int64_t>(t);
     return os;
 }
-inline std::ostream& operator<<(std::ostream& os, const oneapi::mkl::transpose& t) {
+inline std::ostream& operator<<(std::ostream& os, const oneapi::math::transpose& t) {
     os << static_cast<int64_t>(t);
     return os;
 }
-inline std::ostream& operator<<(std::ostream& os, const oneapi::mkl::uplo& t) {
+inline std::ostream& operator<<(std::ostream& os, const oneapi::math::uplo& t) {
     os << static_cast<int64_t>(t);
     return os;
 }
-inline std::ostream& operator<<(std::ostream& os, const oneapi::mkl::side& t) {
+inline std::ostream& operator<<(std::ostream& os, const oneapi::math::side& t) {
     os << static_cast<int64_t>(t);
     return os;
 }
-inline std::ostream& operator<<(std::ostream& os, const oneapi::mkl::diag& t) {
+inline std::ostream& operator<<(std::ostream& os, const oneapi::math::diag& t) {
     os << static_cast<int64_t>(t);
     return os;
 }
-inline std::ostream& operator<<(std::ostream& os, const oneapi::mkl::generate& t) {
+inline std::ostream& operator<<(std::ostream& os, const oneapi::math::generate& t) {
     os << static_cast<int64_t>(t);
     return os;
 }
@@ -173,10 +173,10 @@ struct InputTestController {
         try {
             result = std::apply(tp, tp_args);
         }
-        catch (const oneapi::mkl::unsupported_device& e) {
+        catch (const oneapi::math::unsupported_device& e) {
             result = result_T{ e, result_T::result::pass };
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             result = result_T{ e, result_T::result::pass };
         }
         catch (const std::exception& e) {
diff --git a/tests/unit_tests/lapack/source/CMakeLists.txt b/tests/unit_tests/lapack/source/CMakeLists.txt
index ea9d3140a..f1443440a 100644
--- a/tests/unit_tests/lapack/source/CMakeLists.txt
+++ b/tests/unit_tests/lapack/source/CMakeLists.txt
@@ -86,7 +86,7 @@ if(BUILD_SHARED_LIBS)
       PUBLIC ${CMAKE_BINARY_DIR}/bin
       $<$<BOOL:${LAPACKE_FOUND}>:${LAPACKE_INCLUDE}>
   )
-  target_link_libraries(lapack_source_rt PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(lapack_source_rt PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
 add_library(lapack_source_ct OBJECT ${LAPACK_SOURCES})
@@ -99,4 +99,4 @@ target_include_directories(lapack_source_ct
     PUBLIC ${CMAKE_BINARY_DIR}/bin
     $<$<BOOL:${LAPACKE_FOUND}>:${LAPACKE_INCLUDE}>
 )
-target_link_libraries(lapack_source_ct PUBLIC ONEMKL::SYCL::SYCL)
+target_link_libraries(lapack_source_ct PUBLIC ONEMATH::SYCL::SYCL)
diff --git a/tests/unit_tests/lapack/source/gebrd.cpp b/tests/unit_tests/lapack/source/gebrd.cpp
index 66eb0b231..2bee82853 100644
--- a/tests/unit_tests/lapack/source/gebrd.cpp
+++ b/tests/unit_tests/lapack/source/gebrd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -49,7 +49,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
     int64_t min_mn = std::min<int64_t>(m, n);
 
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<fp_real> d(min_mn);
@@ -70,11 +70,11 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
 
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::gebrd_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::gebrd_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::gebrd_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -82,11 +82,11 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::gebrd(queue, m, n, A_dev, lda, d_dev, e_dev, tauq_dev, taup_dev,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::gebrd(queue, m, n, A_dev, lda, d_dev, e_dev, tauq_dev, taup_dev,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::gebrd, m, n, A_dev, lda, d_dev, e_dev,
-                                  tauq_dev, taup_dev, scratchpad_dev, scratchpad_size);
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::gebrd, m, n, A_dev, lda, d_dev,
+                                  e_dev, tauq_dev, taup_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
 
@@ -122,7 +122,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
     int64_t min_mn = std::min<int64_t>(m, n);
 
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     auto A = A_initial;
     std::vector<fp_real> d(min_mn);
@@ -143,11 +143,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
 
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::gebrd_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::gebrd_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::gebrd_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -157,12 +157,12 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::gebrd(
+        sycl::event func_event = oneapi::math::lapack::gebrd(
             queue, m, n, A_dev, lda, d_dev, e_dev, tauq_dev, taup_dev, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::gebrd, m, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::gebrd, m, n, A_dev, lda,
                                   d_dev, e_dev, tauq_dev, taup_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/geqrf.cpp b/tests/unit_tests/lapack/source/geqrf.cpp
index 27577e972..231be4b5d 100644
--- a/tests/unit_tests/lapack/source/geqrf.cpp
+++ b/tests/unit_tests/lapack/source/geqrf.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -49,7 +49,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> tau(std::min(m, n));
@@ -62,11 +62,11 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::geqrf_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::geqrf_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::geqrf_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -74,10 +74,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::geqrf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::geqrf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::geqrf, m, n, A_dev, lda, tau_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::geqrf, m, n, A_dev, lda, tau_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -105,7 +105,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> tau(std::min(m, n));
@@ -119,11 +119,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::geqrf_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::geqrf_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::geqrf_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -134,11 +134,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::geqrf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::geqrf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::geqrf, m, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::geqrf, m, n, A_dev, lda,
                                   tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/geqrf_batch_group.cpp b/tests/unit_tests/lapack/source/geqrf_batch_group.cpp
index 416466028..c94ddd000 100644
--- a/tests/unit_tests/lapack/source/geqrf_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/geqrf_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -68,7 +68,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             tau_list.emplace_back(std::min(m, n));
@@ -95,12 +95,12 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -120,11 +120,11 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::geqrf_batch(queue, m_vec.data(), n_vec.data(), A_dev_ptrs,
-                                         lda_vec.data(), tau_dev_ptrs, group_count,
-                                         group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::geqrf_batch(queue, m_vec.data(), n_vec.data(), A_dev_ptrs,
+                                          lda_vec.data(), tau_dev_ptrs, group_count,
+                                          group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::geqrf_batch, m_vec.data(),
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::geqrf_batch, m_vec.data(),
                                   n_vec.data(), A_dev_ptrs, lda_vec.data(), tau_dev_ptrs,
                                   group_count, group_sizes_vec.data(), scratchpad_dev,
                                   scratchpad_size);
@@ -205,7 +205,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             tau_list.emplace_back(std::min(m, n));
@@ -233,12 +233,12 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -260,13 +260,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::geqrf_batch(
+        sycl::event func_event = oneapi::math::lapack::geqrf_batch(
             queue, m_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(), tau_dev_ptrs,
             group_count, group_sizes_vec.data(), scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::geqrf_batch,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::geqrf_batch,
                                   m_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(),
                                   tau_dev_ptrs, group_count, group_sizes_vec.data(), scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
diff --git a/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp b/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp
index 16ceef63a..a3cdb55e3 100644
--- a/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -50,7 +50,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_
     std::vector<fp> tau(stride_tau * batch_size);
 
     for (int64_t i = 0; i < batch_size; i++)
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
 
     std::vector<fp> A = A_initial;
 
@@ -61,12 +61,12 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>(
             queue, m, n, lda, stride_a, stride_tau, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>, m, n,
+            queue, scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>, m, n,
             lda, stride_a, stride_tau, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -75,10 +75,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::geqrf_batch(queue, m, n, A_dev, lda, stride_a, tau_dev, stride_tau,
-                                         batch_size, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::geqrf_batch(queue, m, n, A_dev, lda, stride_a, tau_dev, stride_tau,
+                                          batch_size, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::geqrf_batch, m, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::geqrf_batch, m, n, A_dev, lda,
                                   stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev,
                                   scratchpad_size);
 #endif
@@ -122,7 +122,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
     std::vector<fp> tau(stride_tau * batch_size);
 
     for (int64_t i = 0; i < batch_size; i++)
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
 
     std::vector<fp> A = A_initial;
 
@@ -134,12 +134,12 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>(
             queue, m, n, lda, stride_a, stride_tau, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size<fp>, m, n,
+            queue, scratchpad_size = oneapi::math::lapack::geqrf_batch_scratchpad_size<fp>, m, n,
             lda, stride_a, stride_tau, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -150,14 +150,15 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::geqrf_batch(
+        sycl::event func_event = oneapi::math::lapack::geqrf_batch(
             queue, m, n, A_dev, lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::geqrf_batch, m, n, A_dev,
-                                  lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev,
-                                  scratchpad_size, std::vector<sycl::event>{ in_event });
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::geqrf_batch, m, n,
+                                  A_dev, lda, stride_a, tau_dev, stride_tau, batch_size,
+                                  scratchpad_dev, scratchpad_size,
+                                  std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
 
diff --git a/tests/unit_tests/lapack/source/gerqf.cpp b/tests/unit_tests/lapack/source/gerqf.cpp
index dac6d79aa..bb6693598 100644
--- a/tests/unit_tests/lapack/source/gerqf.cpp
+++ b/tests/unit_tests/lapack/source/gerqf.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -49,7 +49,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> tau(std::min(m, n));
@@ -62,11 +62,11 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::gerqf_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::gerqf_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::gerqf_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -74,10 +74,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::gerqf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::gerqf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::gerqf, m, n, A_dev, lda, tau_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::gerqf, m, n, A_dev, lda, tau_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -105,7 +105,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> tau(std::min(m, n));
@@ -119,11 +119,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::gerqf_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::gerqf_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::gerqf_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -134,11 +134,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::gerqf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::gerqf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::gerqf, m, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::gerqf, m, n, A_dev, lda,
                                   tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/gesvd.cpp b/tests/unit_tests/lapack/source/gesvd.cpp
index 1e143315b..afca7850b 100644
--- a/tests/unit_tests/lapack/source/gesvd.cpp
+++ b/tests/unit_tests/lapack/source/gesvd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -41,7 +41,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+bool accuracy(const sycl::device& dev, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
               int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -49,10 +49,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jo
     /* Initialize */
     int64_t min_mn = std::min(m, n);
     int64_t ucols = min_mn;
-    if (jobu == oneapi::mkl::jobsvd::vectors)
+    if (jobu == oneapi::math::jobsvd::vectors)
         ucols = m;
     int64_t vtrows = min_mn;
-    if (jobvt == oneapi::mkl::jobsvd::vectors)
+    if (jobvt == oneapi::math::jobsvd::vectors)
         vtrows = n;
 
     std::vector<fp> A(lda * n);
@@ -60,7 +60,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jo
     std::vector<fp> Vt(ldvt * n);
     std::vector<fp_real> s(min_mn);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
     std::vector<fp> A_initial = A;
 
     /* Compute on device */
@@ -71,12 +71,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jo
         auto Vt_dev = device_alloc<data_T>(queue, Vt.size());
         auto s_dev = device_alloc<data_T, fp_real>(queue, s.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::gesvd_scratchpad_size<fp>(
             queue, jobu, jobvt, m, n, lda, ldu, ldvt);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::gesvd_scratchpad_size<fp>,
                                   jobu, jobvt, m, n, lda, ldu, ldvt);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -85,10 +85,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jo
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, A_dev, lda, s_dev, U_dev, ldu, Vt_dev,
-                                   ldvt, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::gesvd(queue, jobu, jobvt, m, n, A_dev, lda, s_dev, U_dev, ldu, Vt_dev,
+                                    ldvt, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::gesvd, jobu, jobvt, m, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::gesvd, jobu, jobvt, m, n, A_dev, lda,
                                   s_dev, U_dev, ldu, Vt_dev, ldvt, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -107,7 +107,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jo
     }
     bool result = true;
 
-    if (jobu == oneapi::mkl::jobsvd::vectors && jobvt == oneapi::mkl::jobsvd::vectors) {
+    if (jobu == oneapi::math::jobsvd::vectors && jobvt == oneapi::math::jobsvd::vectors) {
         /* |A - U S V'| < |A| O(eps) */
         std::vector<fp> US(m * n);
         int64_t ldus = m;
@@ -116,39 +116,40 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jo
                 US[row + col * ldus] = U[row + col * ldu] * s[col];
         std::vector<fp> USV(m * n);
         int64_t ldusv = m;
-        reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, m, n, n,
-                        1.0, US.data(), ldus, Vt.data(), ldvt, 0.0, USV.data(), ldusv);
+        reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, m, n,
+                        n, 1.0, US.data(), ldus, Vt.data(), ldvt, 0.0, USV.data(), ldusv);
         if (!rel_mat_err_check(m, n, A_initial, lda, USV, ldusv)) {
             test_log::lout << "Factorization check failed" << std::endl;
             result = false;
         }
     }
 
-    if (jobu == oneapi::mkl::jobsvd::vectorsina)
+    if (jobu == oneapi::math::jobsvd::vectorsina)
         reference::lacpy('A', m, ucols, A.data(), lda, U.data(), ldu);
-    if (jobvt == oneapi::mkl::jobsvd::vectorsina)
+    if (jobvt == oneapi::math::jobsvd::vectorsina)
         reference::lacpy('A', vtrows, n, A.data(), lda, Vt.data(), ldvt);
 
-    if (jobu == oneapi::mkl::jobsvd::vectors || jobu == oneapi::mkl::jobsvd::somevec ||
-        jobu == oneapi::mkl::jobsvd::vectorsina) {
+    if (jobu == oneapi::math::jobsvd::vectors || jobu == oneapi::math::jobsvd::somevec ||
+        jobu == oneapi::math::jobsvd::vectorsina) {
         /* |I - U' U| < n O(eps) */
         std::vector<fp> UU(ucols * ucols);
         int64_t lduu = ucols;
-        reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, ucols,
-                        ucols, m, 1.0, U.data(), ldu, U.data(), ldu, 0.0, UU.data(), lduu);
+        reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                        ucols, ucols, m, 1.0, U.data(), ldu, U.data(), ldu, 0.0, UU.data(), lduu);
         if (!rel_id_err_check(ucols, UU, lduu)) {
             test_log::lout << "U Orthogonality check failed" << std::endl;
             result = false;
         }
     }
 
-    if (jobvt == oneapi::mkl::jobsvd::vectors || jobvt == oneapi::mkl::jobsvd::somevec ||
-        jobvt == oneapi::mkl::jobsvd::vectorsina) {
+    if (jobvt == oneapi::math::jobsvd::vectors || jobvt == oneapi::math::jobsvd::somevec ||
+        jobvt == oneapi::math::jobsvd::vectorsina) {
         /* |I - V' V| < n O(eps) */
         std::vector<fp> VV(vtrows * vtrows);
         int64_t ldvv = vtrows;
-        reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, vtrows,
-                        vtrows, n, 1.0, Vt.data(), ldvt, Vt.data(), ldvt, 0.0, VV.data(), ldvv);
+        reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans,
+                        vtrows, vtrows, n, 1.0, Vt.data(), ldvt, Vt.data(), ldvt, 0.0, VV.data(),
+                        ldvv);
         if (!rel_id_err_check(vtrows, VV, ldvv)) {
             test_log::lout << "V Orthogonality check failed" << std::endl;
             result = false;
@@ -162,7 +163,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt,
+bool usm_dependency(const sycl::device& dev, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt,
                     int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -170,10 +171,10 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::m
     /* Initialize */
     int64_t min_mn = std::min(m, n);
     int64_t ucols = min_mn;
-    if (jobu == oneapi::mkl::jobsvd::vectors)
+    if (jobu == oneapi::math::jobsvd::vectors)
         ucols = m;
     int64_t vtrows = min_mn;
-    if (jobvt == oneapi::mkl::jobsvd::vectors)
+    if (jobvt == oneapi::math::jobsvd::vectors)
         vtrows = n;
 
     std::vector<fp> A(lda * n);
@@ -181,7 +182,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::m
     std::vector<fp> Vt(ldvt * n);
     std::vector<fp_real> s(min_mn);
 
-    rand_matrix_diag_dom(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+    rand_matrix_diag_dom(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
     std::vector<fp> A_initial = A;
 
     /* Compute on device */
@@ -193,12 +194,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::m
         auto Vt_dev = device_alloc<data_T>(queue, Vt.size());
         auto s_dev = device_alloc<data_T, fp_real>(queue, s.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::gesvd_scratchpad_size<fp>(
             queue, jobu, jobvt, m, n, lda, ldu, ldvt);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::gesvd_scratchpad_size<fp>,
                                   jobu, jobvt, m, n, lda, ldu, ldvt);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -209,13 +210,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::jobsvd jobu, oneapi::m
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::gesvd(
+        sycl::event func_event = oneapi::math::lapack::gesvd(
             queue, jobu, jobvt, m, n, A_dev, lda, s_dev, U_dev, ldu, Vt_dev, ldvt, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::gesvd, jobu, jobvt, m, n,
-                                  A_dev, lda, s_dev, U_dev, ldu, Vt_dev, ldvt, scratchpad_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::gesvd, jobu, jobvt, m,
+                                  n, A_dev, lda, s_dev, U_dev, ldu, Vt_dev, ldvt, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
diff --git a/tests/unit_tests/lapack/source/getrf.cpp b/tests/unit_tests/lapack/source/getrf.cpp
index 4537ef665..ca84f6680 100644
--- a/tests/unit_tests/lapack/source/getrf.cpp
+++ b/tests/unit_tests/lapack/source/getrf.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -52,7 +52,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<int64_t> ipiv(std::min(m, n));
@@ -65,11 +65,11 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::getrf_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::getrf_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::getrf_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -77,10 +77,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getrf(queue, m, n, A_dev, lda, ipiv_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::getrf(queue, m, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getrf, m, n, A_dev, lda, ipiv_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrf, m, n, A_dev, lda, ipiv_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -108,7 +108,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<int64_t> ipiv(std::min(m, n));
@@ -122,11 +122,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::getrf_scratchpad_size<fp>(queue, m, n, lda);
+            oneapi::math::lapack::getrf_scratchpad_size<fp>(queue, m, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<fp>, m, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::getrf_scratchpad_size<fp>, m, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -137,11 +137,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::getrf(queue, m, n, A_dev, lda, ipiv_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::getrf(queue, m, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getrf, m, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrf, m, n, A_dev, lda,
                                   ipiv_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/getrf_batch_group.cpp b/tests/unit_tests/lapack/source/getrf_batch_group.cpp
index 12e651746..329f3d767 100644
--- a/tests/unit_tests/lapack/source/getrf_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/getrf_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -68,7 +68,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             ipiv_list.emplace_back(std::min(m, n));
@@ -99,12 +99,12 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -124,11 +124,11 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getrf_batch(queue, m_vec.data(), n_vec.data(), A_dev_ptrs,
-                                         lda_vec.data(), ipiv_dev_ptrs, group_count,
-                                         group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getrf_batch(queue, m_vec.data(), n_vec.data(), A_dev_ptrs,
+                                          lda_vec.data(), ipiv_dev_ptrs, group_count,
+                                          group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getrf_batch, m_vec.data(),
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrf_batch, m_vec.data(),
                                   n_vec.data(), A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs,
                                   group_count, group_sizes_vec.data(), scratchpad_dev,
                                   scratchpad_size);
@@ -210,7 +210,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             ipiv_list.emplace_back(std::min(m, n));
@@ -242,12 +242,12 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -269,14 +269,14 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::getrf_batch(
+        sycl::event func_event = oneapi::math::lapack::getrf_batch(
             queue, m_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs,
             group_count, group_sizes_vec.data(), scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, func_event = oneapi::mkl::lapack::getrf_batch, m_vec.data(), n_vec.data(),
+            queue, func_event = oneapi::math::lapack::getrf_batch, m_vec.data(), n_vec.data(),
             A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, group_count, group_sizes_vec.data(),
             scratchpad_dev, scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/getrf_batch_stride.cpp b/tests/unit_tests/lapack/source/getrf_batch_stride.cpp
index 3e4ef6589..0ba2d1714 100644
--- a/tests/unit_tests/lapack/source/getrf_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/getrf_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -48,7 +48,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_
     /* Initialize */
     std::vector<fp> A_initial(stride_a * batch_size);
     for (int64_t i = 0; i < batch_size; i++)
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
 
     std::vector<fp> A = A_initial;
     int64_t stride_ipiv = std::min(m, n);
@@ -61,12 +61,12 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>(
             queue, m, n, lda, stride_a, stride_ipiv, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>, m, n,
+            queue, scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>, m, n,
             lda, stride_a, stride_ipiv, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -75,10 +75,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getrf_batch(queue, m, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv,
-                                         batch_size, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getrf_batch(queue, m, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv,
+                                          batch_size, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getrf_batch, m, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrf_batch, m, n, A_dev, lda,
                                   stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev,
                                   scratchpad_size);
 #endif
@@ -120,7 +120,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
     /* Initialize */
     std::vector<fp> A_initial(stride_a * batch_size);
     for (int64_t i = 0; i < batch_size; i++)
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A_initial, lda, i * stride_a);
 
     std::vector<fp> A = A_initial;
     int64_t stride_ipiv = std::min(m, n);
@@ -134,12 +134,12 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>(
             queue, m, n, lda, stride_a, stride_ipiv, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size<fp>, m, n,
+            queue, scratchpad_size = oneapi::math::lapack::getrf_batch_scratchpad_size<fp>, m, n,
             lda, stride_a, stride_ipiv, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -150,14 +150,15 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::getrf_batch(
+        sycl::event func_event = oneapi::math::lapack::getrf_batch(
             queue, m, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getrf_batch, m, n, A_dev,
-                                  lda, stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev,
-                                  scratchpad_size, std::vector<sycl::event>{ in_event });
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrf_batch, m, n,
+                                  A_dev, lda, stride_a, ipiv_dev, stride_ipiv, batch_size,
+                                  scratchpad_dev, scratchpad_size,
+                                  std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
 
diff --git a/tests/unit_tests/lapack/source/getri.cpp b/tests/unit_tests/lapack/source/getri.cpp
index a1aa2deda..26b7c7307 100644
--- a/tests/unit_tests/lapack/source/getri.cpp
+++ b/tests/unit_tests/lapack/source/getri.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -52,7 +52,7 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, uint64_t seed) {
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<int64_t> ipiv(n);
@@ -70,11 +70,11 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, uint64_t seed) {
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size<fp>(queue, n, lda);
+        const auto scratchpad_size = oneapi::math::lapack::getri_scratchpad_size<fp>(queue, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size<fp>, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::getri_scratchpad_size<fp>, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -83,9 +83,10 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getri(queue, n, A_dev, lda, ipiv_dev, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getri(queue, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getri, n, A_dev, lda, ipiv_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getri, n, A_dev, lda, ipiv_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -112,7 +113,7 @@ bool usm_dependency(const sycl::device& dev, int64_t n, int64_t lda, uint64_t se
 
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
 
     std::vector<fp> A = A_initial;
     std::vector<int64_t> ipiv(n);
@@ -132,11 +133,11 @@ bool usm_dependency(const sycl::device& dev, int64_t n, int64_t lda, uint64_t se
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size<fp>(queue, n, lda);
+        const auto scratchpad_size = oneapi::math::lapack::getri_scratchpad_size<fp>(queue, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size<fp>, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::getri_scratchpad_size<fp>, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -148,11 +149,11 @@ bool usm_dependency(const sycl::device& dev, int64_t n, int64_t lda, uint64_t se
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::getri(queue, n, A_dev, lda, ipiv_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::getri(queue, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getri, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getri, n, A_dev, lda,
                                   ipiv_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/getri_batch_group.cpp b/tests/unit_tests/lapack/source/getri_batch_group.cpp
index 244acfcc8..d13b40442 100644
--- a/tests/unit_tests/lapack/source/getri_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/getri_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -66,7 +66,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             auto& A = A_list.back();
@@ -106,12 +106,12 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>(
             queue, n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>,
             n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -134,12 +134,12 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getri_batch(queue, n_vec.data(), A_dev_ptrs, lda_vec.data(),
-                                         ipiv_dev_ptrs, group_count, group_sizes_vec.data(),
-                                         scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getri_batch(queue, n_vec.data(), A_dev_ptrs, lda_vec.data(),
+                                          ipiv_dev_ptrs, group_count, group_sizes_vec.data(),
+                                          scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getri_batch, n_vec.data(), A_dev_ptrs,
-                                  lda_vec.data(), ipiv_dev_ptrs, group_count,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getri_batch, n_vec.data(),
+                                  A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, group_count,
                                   group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -213,7 +213,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             auto& A = A_list.back();
@@ -254,12 +254,12 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>(
             queue, n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>,
             n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -284,13 +284,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::getri_batch(
+        sycl::event func_event = oneapi::math::lapack::getri_batch(
             queue, n_vec.data(), A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, group_count,
             group_sizes_vec.data(), scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getri_batch,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getri_batch,
                                   n_vec.data(), A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs,
                                   group_count, group_sizes_vec.data(), scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
diff --git a/tests/unit_tests/lapack/source/getri_batch_stride.cpp b/tests/unit_tests/lapack/source/getri_batch_stride.cpp
index 5a71d2d7e..858847f5d 100644
--- a/tests/unit_tests/lapack/source/getri_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/getri_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -47,7 +47,7 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, int64_t stride_a,
     std::vector<fp> A_initial(stride_a * batch_size);
     std::vector<int64_t> ipiv(stride_ipiv * batch_size);
     for (int64_t i = 0; i < batch_size; i++)
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
 
     std::vector<fp> A = A_initial;
 
@@ -68,12 +68,12 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, int64_t stride_a,
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>(
             queue, n, lda, stride_a, stride_ipiv, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>, n, lda,
+            queue, scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>, n, lda,
             stride_a, stride_ipiv, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -83,10 +83,10 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, int64_t stride_a,
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getri_batch(queue, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv,
-                                         batch_size, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getri_batch(queue, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv,
+                                          batch_size, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getri_batch, n, A_dev, lda, stride_a,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getri_batch, n, A_dev, lda, stride_a,
                                   ipiv_dev, stride_ipiv, batch_size, scratchpad_dev,
                                   scratchpad_size);
 #endif
@@ -126,7 +126,7 @@ bool usm_dependency(const sycl::device& dev, int64_t n, int64_t lda, int64_t str
     std::vector<fp> A_initial(stride_a * batch_size);
     std::vector<int64_t> ipiv(stride_ipiv * batch_size);
     for (int64_t i = 0; i < batch_size; i++)
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
 
     std::vector<fp> A = A_initial;
 
@@ -148,12 +148,12 @@ bool usm_dependency(const sycl::device& dev, int64_t n, int64_t lda, int64_t str
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>(
             queue, n, lda, stride_a, stride_ipiv, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size<fp>, n, lda,
+            queue, scratchpad_size = oneapi::math::lapack::getri_batch_scratchpad_size<fp>, n, lda,
             stride_a, stride_ipiv, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -165,12 +165,12 @@ bool usm_dependency(const sycl::device& dev, int64_t n, int64_t lda, int64_t str
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::getri_batch(
+        sycl::event func_event = oneapi::math::lapack::getri_batch(
             queue, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getri_batch, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getri_batch, n, A_dev,
                                   lda, stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/getrs.cpp b/tests/unit_tests/lapack/source/getrs.cpp
index bfc271758..f43abb427 100644
--- a/tests/unit_tests/lapack/source/getrs.cpp
+++ b/tests/unit_tests/lapack/source/getrs.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n, int64_t nrhs,
+bool accuracy(const sycl::device& dev, oneapi::math::transpose trans, int64_t n, int64_t nrhs,
               int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -48,8 +48,8 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n,
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
     std::vector<fp> B_initial(ldb * nrhs);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, ldb);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, ldb);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> B = B_initial;
@@ -70,11 +70,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n,
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::getrs_scratchpad_size<fp>(queue, trans, n, nrhs, lda, ldb);
+            oneapi::math::lapack::getrs_scratchpad_size<fp>(queue, trans, n, nrhs, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::getrs_scratchpad_size<fp>,
                                   trans, n, nrhs, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -85,10 +85,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n,
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, A_dev, lda, ipiv_dev, B_dev, ldb,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getrs(queue, trans, n, nrhs, A_dev, lda, ipiv_dev, B_dev, ldb,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getrs, trans, n, nrhs, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrs, trans, n, nrhs, A_dev, lda,
                                   ipiv_dev, B_dev, ldb, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -110,7 +110,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n, int64_t nrhs,
+bool usm_dependency(const sycl::device& dev, oneapi::math::transpose trans, int64_t n, int64_t nrhs,
                     int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -118,8 +118,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64
     /* Initialize */
     std::vector<fp> A_initial(lda * n);
     std::vector<fp> B_initial(ldb * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, ldb);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, ldb);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> B = B_initial;
@@ -142,11 +142,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::getrs_scratchpad_size<fp>(queue, trans, n, nrhs, lda, ldb);
+            oneapi::math::lapack::getrs_scratchpad_size<fp>(queue, trans, n, nrhs, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::getrs_scratchpad_size<fp>,
                                   trans, n, nrhs, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -159,12 +159,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::getrs(
+        sycl::event func_event = oneapi::math::lapack::getrs(
             queue, trans, n, nrhs, A_dev, lda, ipiv_dev, B_dev, ldb, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getrs, trans, n, nrhs,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrs, trans, n, nrhs,
                                   A_dev, lda, ipiv_dev, B_dev, ldb, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/getrs_batch_group.cpp b/tests/unit_tests/lapack/source/getrs_batch_group.cpp
index 2027663e4..64c88bcd6 100644
--- a/tests/unit_tests/lapack/source/getrs_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/getrs_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -46,8 +46,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Test Parameters */
-    std::vector<oneapi::mkl::transpose> trans_vec = { oneapi::mkl::transpose::nontrans,
-                                                      oneapi::mkl::transpose::trans };
+    std::vector<oneapi::math::transpose> trans_vec = { oneapi::math::transpose::nontrans,
+                                                       oneapi::math::transpose::trans };
     std::vector<int64_t> n_vec = { 4, 5 };
     std::vector<int64_t> nrhs_vec = { 9, 6 };
     std::vector<int64_t> lda_vec = { 6, 6 };
@@ -75,14 +75,14 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             auto& A = A_list.back();
 
             B_initial_list.emplace_back(ldb * nrhs);
             auto& B_initial = B_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, lda);
 
             B_list.emplace_back(B_initial);
             auto& B = B_list.back();
@@ -127,13 +127,13 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>(
             queue, trans_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>,
             trans_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #endif
@@ -162,12 +162,12 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getrs_batch(queue, trans_vec.data(), n_vec.data(), nrhs_vec.data(),
-                                         A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, B_dev_ptrs,
-                                         ldb_vec.data(), group_count, group_sizes_vec.data(),
-                                         scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getrs_batch(queue, trans_vec.data(), n_vec.data(), nrhs_vec.data(),
+                                          A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, B_dev_ptrs,
+                                          ldb_vec.data(), group_count, group_sizes_vec.data(),
+                                          scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getrs_batch, trans_vec.data(),
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrs_batch, trans_vec.data(),
                                   n_vec.data(), nrhs_vec.data(), A_dev_ptrs, lda_vec.data(),
                                   ipiv_dev_ptrs, B_dev_ptrs, ldb_vec.data(), group_count,
                                   group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
@@ -232,7 +232,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Test Parameters */
-    std::vector<oneapi::mkl::transpose> trans_vec = { oneapi::mkl::transpose::nontrans };
+    std::vector<oneapi::math::transpose> trans_vec = { oneapi::math::transpose::nontrans };
     std::vector<int64_t> n_vec = { 1 };
     std::vector<int64_t> nrhs_vec = { 1 };
     std::vector<int64_t> lda_vec = { 1 };
@@ -260,14 +260,14 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_initial_list.emplace_back(lda * n);
             auto& A_initial = A_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda);
 
             A_list.emplace_back(A_initial);
             auto& A = A_list.back();
 
             B_initial_list.emplace_back(ldb * nrhs);
             auto& B_initial = B_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, lda);
 
             B_list.emplace_back(B_initial);
             auto& B = B_list.back();
@@ -313,13 +313,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>(
             queue, trans_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>,
             trans_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #endif
@@ -350,13 +350,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::getrs_batch(
+        sycl::event func_event = oneapi::math::lapack::getrs_batch(
             queue, trans_vec.data(), n_vec.data(), nrhs_vec.data(), A_dev_ptrs, lda_vec.data(),
             ipiv_dev_ptrs, B_dev_ptrs, ldb_vec.data(), group_count, group_sizes_vec.data(),
             scratchpad_dev, scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getrs_batch,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrs_batch,
                                   trans_vec.data(), n_vec.data(), nrhs_vec.data(), A_dev_ptrs,
                                   lda_vec.data(), ipiv_dev_ptrs, B_dev_ptrs, ldb_vec.data(),
                                   group_count, group_sizes_vec.data(), scratchpad_dev,
diff --git a/tests/unit_tests/lapack/source/getrs_batch_stride.cpp b/tests/unit_tests/lapack/source/getrs_batch_stride.cpp
index 1faf3d3e6..600067883 100644
--- a/tests/unit_tests/lapack/source/getrs_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/getrs_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n, int64_t nrhs,
+bool accuracy(const sycl::device& dev, oneapi::math::transpose trans, int64_t n, int64_t nrhs,
               int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b,
               int64_t batch_size, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
@@ -49,8 +49,8 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n,
     std::vector<fp> B_initial(stride_b * batch_size);
     std::vector<int64_t> ipiv(stride_ipiv * batch_size);
     for (int64_t i = 0; i < batch_size; i++) {
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, ldb, i * stride_b);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, ldb, i * stride_b);
     }
 
     std::vector<fp> A = A_initial;
@@ -74,13 +74,13 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n,
         auto B_dev = device_alloc<data_T>(queue, B.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>(
             queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>, trans, n,
-            nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
+            queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>, trans,
+            n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -90,11 +90,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n,
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, A_dev, lda, stride_a, ipiv_dev,
-                                         stride_ipiv, B_dev, ldb, stride_b, batch_size,
-                                         scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::getrs_batch(queue, trans, n, nrhs, A_dev, lda, stride_a, ipiv_dev,
+                                          stride_ipiv, B_dev, ldb, stride_b, batch_size,
+                                          scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::getrs_batch, trans, n, nrhs, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrs_batch, trans, n, nrhs, A_dev,
                                   lda, stride_a, ipiv_dev, stride_ipiv, B_dev, ldb, stride_b,
                                   batch_size, scratchpad_dev, scratchpad_size);
 #endif
@@ -128,7 +128,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64_t n, int64_t nrhs,
+bool usm_dependency(const sycl::device& dev, oneapi::math::transpose trans, int64_t n, int64_t nrhs,
                     int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb,
                     int64_t stride_b, int64_t batch_size, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
@@ -137,8 +137,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64
     std::vector<fp> B_initial(stride_b * batch_size);
     std::vector<int64_t> ipiv(stride_ipiv * batch_size);
     for (auto i = 0; i < batch_size; ++i) {
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, nrhs, n, B_initial, ldb, i * stride_b);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A_initial, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, nrhs, n, B_initial, ldb, i * stride_b);
     }
 
     std::vector<fp> A = A_initial;
@@ -163,13 +163,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64
         auto B_dev = device_alloc<data_T>(queue, B.size());
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>(
             queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size<fp>, trans, n,
-            nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
+            queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size<fp>, trans,
+            n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -181,13 +181,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::transpose trans, int64
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::getrs_batch(
+        sycl::event func_event = oneapi::math::lapack::getrs_batch(
             queue, trans, n, nrhs, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, B_dev, ldb,
             stride_b, batch_size, scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::getrs_batch, trans, n,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrs_batch, trans, n,
                                   nrhs, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, B_dev, ldb,
                                   stride_b, batch_size, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
diff --git a/tests/unit_tests/lapack/source/heevd.cpp b/tests/unit_tests/lapack/source/heevd.cpp
index 62c23c3ad..8eecbba2b 100644
--- a/tests/unit_tests/lapack/source/heevd.cpp
+++ b/tests/unit_tests/lapack/source/heevd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int64_t n,
+bool accuracy(const sycl::device& dev, oneapi::math::job jobz, oneapi::math::uplo uplo, int64_t n,
               int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -59,11 +59,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::heevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
+            oneapi::math::lapack::heevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::heevd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::heevd_scratchpad_size<fp>,
                                   jobz, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -72,10 +72,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::heevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::heevd, jobz, uplo, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::heevd, jobz, uplo, n, A_dev, lda,
                                   w_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -97,7 +97,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+bool usm_dependency(const sycl::device& dev, oneapi::math::job jobz, oneapi::math::uplo uplo,
                     int64_t n, int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -117,11 +117,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl:
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::heevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
+            oneapi::math::lapack::heevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::heevd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::heevd_scratchpad_size<fp>,
                                   jobz, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -133,11 +133,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl:
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::heevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::heevd, jobz, uplo, n,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::heevd, jobz, uplo, n,
                                   A_dev, lda, w_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/hegvd.cpp b/tests/unit_tests/lapack/source/hegvd.cpp
index 9a109e6b8..55422330b 100644
--- a/tests/unit_tests/lapack/source/hegvd.cpp
+++ b/tests/unit_tests/lapack/source/hegvd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -42,8 +42,8 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-              int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
+bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz,
+              oneapi::math::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -65,11 +65,11 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::hegvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
+            oneapi::math::lapack::hegvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::hegvd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::hegvd_scratchpad_size<fp>,
                                   itype, jobz, uplo, n, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -79,10 +79,10 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::hegvd(queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::hegvd, itype, jobz, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::hegvd, itype, jobz, uplo, n, A_dev,
                                   lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -107,24 +107,24 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
 
     /* |D_ref - D| < |D_ref| O(eps) */
     std::vector<fp_real> D_ref(n);
-    reference::hegvd(itype, oneapi::mkl::job::novec, uplo, n, std::vector<fp>(A_initial).data(),
+    reference::hegvd(itype, oneapi::math::job::novec, uplo, n, std::vector<fp>(A_initial).data(),
                      lda, std::vector<fp>(B_initial).data(), ldb, D_ref.data());
     if (!rel_vec_err_check(n, D_ref, D, 10.0)) {
         test_log::lout << "Eigenvalue check failed" << std::endl;
         result = false;
     }
 
-    if (oneapi::mkl::job::vec == jobz) {
+    if (oneapi::math::job::vec == jobz) {
         if (itype == 1) {
             /* |A Z - B Z D| < |A Z| O(eps) */
             std::vector<fp> AZ(n * n);
             int64_t ldaz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, A_initial.data(), lda, Z.data(), ldz, 0.0, AZ.data(), ldaz);
 
             std::vector<fp> BZ(n * n);
             int64_t ldbz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, B_initial.data(), ldb, Z.data(), ldz, 0.0, BZ.data(), ldbz);
 
             std::vector<fp> BZD(n * n);
@@ -141,8 +141,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |I - Z' B Z| < n O(eps) */
             std::vector<fp> ZBZ(n * n);
             int64_t ldzbz = n;
-            reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n,
-                            n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
+            reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                            n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
             if (!rel_id_err_check(n, ZBZ, ldzbz)) {
                 test_log::lout << "Orthogonality check failed" << std::endl;
                 result = false;
@@ -152,12 +152,12 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |A B Z - Z D| < |A B Z| O(eps) */
             std::vector<fp> BZ(n * n);
             int64_t ldbz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, B_initial.data(), ldb, Z.data(), ldz, 0.0, BZ.data(), ldbz);
 
             std::vector<fp> ABZ(n * n);
             int64_t ldabz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, A_initial.data(), lda, BZ.data(), ldbz, 0.0, ABZ.data(),
                             ldabz);
 
@@ -175,8 +175,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |I - Z' B Z| < n O(eps) */
             std::vector<fp> ZBZ(n * n);
             int64_t ldzbz = n;
-            reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n,
-                            n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
+            reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                            n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
             if (!rel_id_err_check(n, ZBZ, ldzbz)) {
                 test_log::lout << "Orthogonality check failed" << std::endl;
                 result = false;
@@ -187,7 +187,7 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* C = B^-1 Z */
             std::vector<fp> AZ(n * n);
             int64_t ldaz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, A_initial.data(), lda, Z.data(), ldz, 0.0, AZ.data(), ldaz);
 
             std::vector<fp> C(n * n);
@@ -213,8 +213,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |I - Z' B^-1 Z| = |I - Z' C| < n O(eps) */
             std::vector<fp> ZhC(n * n);
             int64_t ldzhc = n;
-            reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n,
-                            n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc);
+            reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                            n, n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc);
             if (!rel_id_err_check(n, ZhC, ldzhc)) {
                 test_log::lout << "Orthogonality check failed" << std::endl;
                 result = false;
@@ -229,8 +229,8 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz,
-                    oneapi::mkl::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
+bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::math::job jobz,
+                    oneapi::math::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -253,11 +253,11 @@ bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::mkl::job job
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::hegvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
+            oneapi::math::lapack::hegvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::hegvd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::hegvd_scratchpad_size<fp>,
                                   itype, jobz, uplo, n, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -269,14 +269,14 @@ bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::mkl::job job
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::hegvd(
+        sycl::event func_event = oneapi::math::lapack::hegvd(
             queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::hegvd, itype, jobz, uplo,
-                                  n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size,
-                                  std::vector<sycl::event>{ in_event });
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::hegvd, itype, jobz,
+                                  uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev,
+                                  scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
 
diff --git a/tests/unit_tests/lapack/source/hetrd.cpp b/tests/unit_tests/lapack/source/hetrd.cpp
index 13172d64f..a5b078eeb 100644
--- a/tests/unit_tests/lapack/source/hetrd.cpp
+++ b/tests/unit_tests/lapack/source/hetrd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -50,7 +50,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     std::vector<fp_real> d(n);
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     std::vector<fp> A_initial = A;
 
     /* Compute on device */
@@ -63,11 +63,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::hetrd_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::hetrd_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::hetrd_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::hetrd_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -78,10 +78,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::hetrd(queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::hetrd(queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::hetrd, uplo, n, A_dev, lda, d_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::hetrd, uplo, n, A_dev, lda, d_dev,
                                   e_dev, tau_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -114,9 +114,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
 
     std::vector<fp> QTQ{ T };
     int64_t ldqtq = n;
-    reference::or_un_mtr(oneapi::mkl::side::left, uplo, oneapi::mkl::transpose::nontrans, n, n,
+    reference::or_un_mtr(oneapi::math::side::left, uplo, oneapi::math::transpose::nontrans, n, n,
                          A.data(), lda, tau.data(), QTQ.data(), ldqtq);
-    reference::or_un_mtr(oneapi::mkl::side::right, uplo, oneapi::mkl::transpose::conjtrans, n, n,
+    reference::or_un_mtr(oneapi::math::side::right, uplo, oneapi::math::transpose::conjtrans, n, n,
                          A.data(), lda, tau.data(), QTQ.data(), ldqtq);
 
     if (!rel_mat_err_check(n, n, QTQ, ldqtq, A_initial, lda)) {
@@ -127,7 +127,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     /* A[i, i] = d[i] */
     for (int64_t diag = 0; diag < n; diag++)
         d[diag] -= A[diag + diag * lda].real();
-    if (uplo == oneapi::mkl::uplo::upper)
+    if (uplo == oneapi::math::uplo::upper)
         for (int64_t diag = 0; diag < n - 1; diag++)
             e[diag] -= A[diag + (diag + 1) * lda].real();
     else
@@ -152,7 +152,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -162,7 +162,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
     std::vector<fp_real> d(n);
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     std::vector<fp> A_initial = A;
 
     /* Compute on device */
@@ -176,11 +176,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::hetrd_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::hetrd_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::hetrd_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::hetrd_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -193,12 +193,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::hetrd(
+        sycl::event func_event = oneapi::math::lapack::hetrd(
             queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev, scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::hetrd, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::hetrd, uplo, n, A_dev,
                                   lda, d_dev, e_dev, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/hetrf.cpp b/tests/unit_tests/lapack/source/hetrf.cpp
index 73535a77f..72c767b74 100644
--- a/tests/unit_tests/lapack/source/hetrf.cpp
+++ b/tests/unit_tests/lapack/source/hetrf.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -43,7 +43,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -63,11 +63,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::hetrf_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::hetrf_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::hetrf_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::hetrf_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -76,10 +76,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::hetrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::hetrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::hetrf, uplo, n, A_dev, lda, ipiv_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::hetrf, uplo, n, A_dev, lda, ipiv_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -104,7 +104,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     for (int64_t d = 0; d < n; d++)
         U[d + d * ldu] = 1.0;
 
-    if (uplo == oneapi::mkl::uplo::upper) {
+    if (uplo == oneapi::math::uplo::upper) {
         int64_t k = n - 1;
         while (k >= 0) {
             reference::laset('A', n, n, 0.0, 1.0, Uk.data(), ldu);
@@ -117,9 +117,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu),
                                     ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + k * ldd] = A[k + k * lda];
                 k -= 1;
@@ -135,9 +135,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k - 1 + 0 * ldu), ldu,
                                     Uk.data() + (piv + 0 * ldu), ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + k * ldd] = A[k + k * lda];
                 D[k - 1 + (k - 1) * ldd] = A[k - 1 + (k - 1) * lda];
@@ -160,9 +160,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k + 0 * lda), ldu, Uk.data() + (piv + 0 * ldu),
                                     ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + (k)*ldd] = A[k + (k)*lda];
                 k += 1;
@@ -178,9 +178,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k + 1 + 0 * ldu), ldu,
                                     Uk.data() + (piv + 0 * ldu), ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + k * ldd] = A[k + k * lda];
                 D[k + 1 + (k + 1) * ldd] = A[k + 1 + (k + 1) * lda];
@@ -194,12 +194,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     /* |A - UDU'| < |A| O(eps) */
     std::vector<fp> UD(n * n);
     int64_t ldud = n;
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n, n, n,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, n, n,
                     1.0, U.data(), ldu, D.data(), ldd, 0.0, UD.data(), ldud);
 
     std::vector<fp> UDU(n * n);
     int64_t ldudu = n;
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::conjtrans, n, n, n,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, n, n, n,
                     1.0, UD.data(), ldud, U.data(), ldu, 0.0, UDU.data(), ldudu);
 
     if (!rel_mat_err_check(n, n, UDU, ldudu, A_initial, lda)) {
@@ -215,7 +215,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -236,11 +236,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::hetrf_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::hetrf_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::hetrf_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::hetrf_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -252,11 +252,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::hetrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::hetrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::hetrf, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::hetrf, uplo, n, A_dev,
                                   lda, ipiv_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/orgbr.cpp b/tests/unit_tests/lapack/source/orgbr.cpp
index 274cafce0..3f78a9653 100644
--- a/tests/unit_tests/lapack/source/orgbr.cpp
+++ b/tests/unit_tests/lapack/source/orgbr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -43,7 +43,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k,
+bool accuracy(const sycl::device& dev, oneapi::math::generate vect, int64_t m, int64_t n, int64_t k,
               int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -52,9 +52,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
     int64_t m_A = m;
     int64_t n_A = n;
 
-    if (vect == oneapi::mkl::generate::Q)
+    if (vect == oneapi::math::generate::Q)
         n_A = k;
-    else /* vect == oneapi::mkl::generate::P */
+    else /* vect == oneapi::math::generate::P */
         m_A = k;
 
     int64_t min_mn_A = std::min<int64_t>(m_A, n_A);
@@ -65,10 +65,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
     std::vector<fp> tauq(min_mn_A);
     std::vector<fp> taup(min_mn_A);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m_A, n_A, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m_A, n_A, A, lda);
     reference::gebrd(m_A, n_A, A.data(), lda, d.data(), e.data(), tauq.data(), taup.data());
 
-    auto& tau = (vect == oneapi::mkl::generate::Q) ? tauq : taup;
+    auto& tau = (vect == oneapi::math::generate::Q) ? tauq : taup;
 
     /* Compute on device */
     {
@@ -79,11 +79,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
 
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::orgbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
+            oneapi::math::lapack::orgbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::orgbr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::orgbr_scratchpad_size<fp>,
                                   vect, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -93,10 +93,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::orgbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::orgbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::orgbr, vect, m, n, k, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgbr, vect, m, n, k, A_dev, lda,
                                   tau_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -117,7 +117,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, int64_t n,
+bool usm_dependency(const sycl::device& dev, oneapi::math::generate vect, int64_t m, int64_t n,
                     int64_t k, int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -126,9 +126,9 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
     int64_t m_A = m;
     int64_t n_A = n;
 
-    if (vect == oneapi::mkl::generate::Q)
+    if (vect == oneapi::math::generate::Q)
         n_A = k;
-    else /* vect == oneapi::mkl::generate::P */
+    else /* vect == oneapi::math::generate::P */
         m_A = k;
 
     int64_t min_mn_A = std::min<int64_t>(m_A, n_A);
@@ -139,10 +139,10 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
     std::vector<fp> tauq(min_mn_A);
     std::vector<fp> taup(min_mn_A);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m_A, n_A, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m_A, n_A, A, lda);
     reference::gebrd(m_A, n_A, A.data(), lda, d.data(), e.data(), tauq.data(), taup.data());
 
-    auto& tau = (vect == oneapi::mkl::generate::Q) ? tauq : taup;
+    auto& tau = (vect == oneapi::math::generate::Q) ? tauq : taup;
 
     /* Compute on device */
     bool result;
@@ -154,11 +154,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
 
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::orgbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
+            oneapi::math::lapack::orgbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::orgbr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::orgbr_scratchpad_size<fp>,
                                   vect, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -171,11 +171,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::orgbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::orgbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::orgbr, vect, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgbr, vect, m, n, k,
                                   A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/orgqr.cpp b/tests/unit_tests/lapack/source/orgqr.cpp
index 9d62daf5f..8f1da86b8 100644
--- a/tests/unit_tests/lapack/source/orgqr.cpp
+++ b/tests/unit_tests/lapack/source/orgqr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -54,7 +54,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
     std::vector<fp> A(lda * n);
     std::vector<fp> tau(k);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
     auto info = reference::geqrf(m, k, A.data(), lda, tau.data());
     if (0 != info) {
         test_log::lout << "reference geqrf failed with info: " << info << std::endl;
@@ -68,11 +68,11 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::orgqr_scratchpad_size<fp>(queue, m, n, k, lda);
+            oneapi::math::lapack::orgqr_scratchpad_size<fp>(queue, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgqr_scratchpad_size<fp>, m, n, k, lda);
+            queue, scratchpad_size = oneapi::math::lapack::orgqr_scratchpad_size<fp>, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -81,10 +81,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::orgqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::orgqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::orgqr, m, n, k, A_dev, lda, tau_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgqr, m, n, k, A_dev, lda, tau_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -112,7 +112,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
 
     /* Initialize */
     std::vector<fp> A(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::geqrf(m, k, A.data(), lda, tau.data());
@@ -129,11 +129,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::orgqr_scratchpad_size<fp>(queue, m, n, k, lda);
+            oneapi::math::lapack::orgqr_scratchpad_size<fp>(queue, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgqr_scratchpad_size<fp>, m, n, k, lda);
+            queue, scratchpad_size = oneapi::math::lapack::orgqr_scratchpad_size<fp>, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -145,11 +145,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::orgqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::orgqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::orgqr, m, n, k, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgqr, m, n, k, A_dev,
                                   lda, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/orgqr_batch_group.cpp b/tests/unit_tests/lapack/source/orgqr_batch_group.cpp
index 3af796e7d..172471725 100644
--- a/tests/unit_tests/lapack/source/orgqr_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/orgqr_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -69,7 +69,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_list.emplace_back(lda * n);
             auto& A = A_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
 
             tau_list.emplace_back(k);
             auto& tau = tau_list.back();
@@ -101,13 +101,13 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #endif
@@ -130,11 +130,11 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::orgqr_batch(queue, m_vec.data(), n_vec.data(), k_vec.data(),
-                                         A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count,
-                                         group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::orgqr_batch(queue, m_vec.data(), n_vec.data(), k_vec.data(),
+                                          A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count,
+                                          group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::orgqr_batch, m_vec.data(),
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgqr_batch, m_vec.data(),
                                   n_vec.data(), k_vec.data(), A_dev_ptrs, lda_vec.data(),
                                   tau_dev_ptrs, group_count, group_sizes_vec.data(), scratchpad_dev,
                                   scratchpad_size);
@@ -213,7 +213,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_list.emplace_back(lda * n);
             auto& A = A_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
 
             tau_list.emplace_back(k);
             auto& tau = tau_list.back();
@@ -246,13 +246,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #endif
@@ -277,13 +277,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::orgqr_batch(
+        sycl::event func_event = oneapi::math::lapack::orgqr_batch(
             queue, m_vec.data(), n_vec.data(), k_vec.data(), A_dev_ptrs, lda_vec.data(),
             tau_dev_ptrs, group_count, group_sizes_vec.data(), scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::orgqr_batch,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgqr_batch,
                                   m_vec.data(), n_vec.data(), k_vec.data(), A_dev_ptrs,
                                   lda_vec.data(), tau_dev_ptrs, group_count, group_sizes_vec.data(),
                                   scratchpad_dev, scratchpad_size,
diff --git a/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp b/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp
index 1cf3471c5..8ac2b2dff 100644
--- a/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -50,7 +50,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
     std::vector<fp> tau(stride_tau * batch_size);
 
     for (int64_t i = 0; i < batch_size; i++) {
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda, i * stride_a);
         auto info =
             reference::geqrf(m, k, A.data() + i * stride_a, lda, tau.data() + i * stride_tau);
         if (0 != info) {
@@ -67,12 +67,12 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>(
             queue, m, n, k, lda, stride_a, stride_tau, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>, m, n, k,
+            queue, scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>, m, n, k,
             lda, stride_a, stride_tau, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -82,10 +82,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau,
-                                         batch_size, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::orgqr_batch(queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau,
+                                          batch_size, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::orgqr_batch, m, n, k, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgqr_batch, m, n, k, A_dev, lda,
                                   stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev,
                                   scratchpad_size);
 #endif
@@ -127,7 +127,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
     std::vector<fp> tau(stride_tau * batch_size);
 
     for (int64_t i = 0; i < batch_size; i++) {
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda, i * stride_a);
         auto info =
             reference::geqrf(m, k, A.data() + i * stride_a, lda, tau.data() + i * stride_tau);
         if (0 != info) {
@@ -145,12 +145,12 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>(
             queue, m, n, k, lda, stride_a, stride_tau, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size<fp>, m, n, k,
+            queue, scratchpad_size = oneapi::math::lapack::orgqr_batch_scratchpad_size<fp>, m, n, k,
             lda, stride_a, stride_tau, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -162,12 +162,12 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::orgqr_batch(
+        sycl::event func_event = oneapi::math::lapack::orgqr_batch(
             queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::orgqr_batch, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgqr_batch, m, n, k,
                                   A_dev, lda, stride_a, tau_dev, stride_tau, batch_size,
                                   scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
diff --git a/tests/unit_tests/lapack/source/orgtr.cpp b/tests/unit_tests/lapack/source/orgtr.cpp
index 5a01745d5..154402156 100644
--- a/tests/unit_tests/lapack/source/orgtr.cpp
+++ b/tests/unit_tests/lapack/source/orgtr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -51,7 +51,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     auto info = reference::sytrd(uplo, n, A.data(), lda, d.data(), e.data(), tau.data());
     if (0 != info) {
         test_log::lout << "reference sytrd failed with info = " << info << std::endl;
@@ -66,11 +66,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::orgtr_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::orgtr_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgtr_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::orgtr_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -79,10 +79,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::orgtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::orgtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::orgtr, uplo, n, A_dev, lda, tau_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgtr, uplo, n, A_dev, lda, tau_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -103,7 +103,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -114,7 +114,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     auto info = reference::sytrd(uplo, n, A.data(), lda, d.data(), e.data(), tau.data());
     if (0 != info) {
         test_log::lout << "reference sytrd failed with info = " << info << std::endl;
@@ -130,11 +130,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::orgtr_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::orgtr_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::orgtr_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::orgtr_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -146,11 +146,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::orgtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::orgtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::orgtr, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgtr, uplo, n, A_dev,
                                   lda, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/ormqr.cpp b/tests/unit_tests/lapack/source/ormqr.cpp
index e2ed49b96..1c27a5cd7 100644
--- a/tests/unit_tests/lapack/source/ormqr.cpp
+++ b/tests/unit_tests/lapack/source/ormqr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -47,19 +47,19 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl::transpose trans,
+bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::math::transpose trans,
               int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
-    int64_t nq = (left_right == oneapi::mkl::side::left) ? m : n;
+    int64_t nq = (left_right == oneapi::math::side::left) ? m : n;
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::geqrf(nq, k, A.data(), lda, tau.data());
@@ -75,12 +75,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ormqr_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ormqr_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -91,10 +91,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ormqr(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
-                                   ldc, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::ormqr(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
+                                    ldc, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ormqr, left_right, trans, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ormqr, left_right, trans, m, n, k,
                                   A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -131,20 +131,20 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
-                    oneapi::mkl::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right,
+                    oneapi::math::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
                     int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
-    int64_t nq = (left_right == oneapi::mkl::side::left) ? m : n;
+    int64_t nq = (left_right == oneapi::math::side::left) ? m : n;
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::geqrf(nq, k, A.data(), lda, tau.data());
@@ -162,12 +162,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ormqr_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ormqr_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -180,13 +180,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::ormqr(
+        sycl::event func_event = oneapi::math::lapack::ormqr(
             queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ormqr, left_right, trans,
-                                  m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormqr, left_right,
+                                  trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
diff --git a/tests/unit_tests/lapack/source/ormrq.cpp b/tests/unit_tests/lapack/source/ormrq.cpp
index 4882e5bc7..f63dca6a8 100644
--- a/tests/unit_tests/lapack/source/ormrq.cpp
+++ b/tests/unit_tests/lapack/source/ormrq.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,18 +40,18 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl::transpose trans,
+bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::math::transpose trans,
               int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
     int64_t nq;
-    if (left_right == oneapi::mkl::side::left) {
+    if (left_right == oneapi::math::side::left) {
         if (k > m) {
             test_log::lout << "Bad test input, side == left and k > m (" << k << " > " << m << ")"
                            << std::endl;
@@ -69,7 +69,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
     }
 
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::gerqf(nq, k, A.data(), lda, tau.data());
@@ -85,12 +85,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ormrq_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ormrq_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -101,10 +101,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ormrq(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
-                                   ldc, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::ormrq(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
+                                    ldc, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ormrq, left_right, trans, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ormrq, left_right, trans, m, n, k,
                                   A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -141,20 +141,20 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
-                    oneapi::mkl::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right,
+                    oneapi::math::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
                     int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
-    int64_t nq = (left_right == oneapi::mkl::side::left) ? m : n;
+    int64_t nq = (left_right == oneapi::math::side::left) ? m : n;
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::gerqf(nq, k, A.data(), lda, tau.data());
@@ -171,12 +171,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ormrq_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ormrq_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -189,13 +189,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::ormrq(
+        sycl::event func_event = oneapi::math::lapack::ormrq(
             queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ormrq, left_right, trans,
-                                  m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormrq, left_right,
+                                  trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
diff --git a/tests/unit_tests/lapack/source/ormtr.cpp b/tests/unit_tests/lapack/source/ormtr.cpp
index 4e8dd95b9..dde332b32 100644
--- a/tests/unit_tests/lapack/source/ormtr.cpp
+++ b/tests/unit_tests/lapack/source/ormtr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,17 +40,17 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, int64_t n, int64_t lda,
               int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
-    oneapi::mkl::side side = oneapi::mkl::side::right;
-    oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
+    oneapi::math::side side = oneapi::math::side::right;
+    oneapi::math::transpose trans = oneapi::math::transpose::nontrans;
 
     std::vector<fp> A(n * lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
 
     std::vector<fp> tau(n);
     std::vector<fp_real> d(n);
@@ -62,7 +62,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_
     }
 
     std::vector<fp> C(n * ldc);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C, ldc);
     std::vector<fp> C_initial = C;
 
     /* Compute on device */
@@ -73,12 +73,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ormtr_scratchpad_size<fp>(
             queue, side, uplo, trans, m, n, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ormtr_scratchpad_size<fp>,
                                   side, uplo, trans, m, n, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -89,11 +89,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::ormtr(queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ormtr, side, uplo, trans, m, n, A_dev,
-                                  lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ormtr, side, uplo, trans, m, n,
+                                  A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
 
@@ -127,17 +127,17 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_t n,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, int64_t n,
                     int64_t lda, int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
-    oneapi::mkl::side side = oneapi::mkl::side::right;
-    oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
+    oneapi::math::side side = oneapi::math::side::right;
+    oneapi::math::transpose trans = oneapi::math::transpose::nontrans;
 
     std::vector<fp> A(n * lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
 
     std::vector<fp> tau(n);
     std::vector<fp_real> d(n);
@@ -149,7 +149,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m,
     }
 
     std::vector<fp> C(n * ldc);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C, ldc);
     std::vector<fp> C_initial = C;
 
     /* Compute on device */
@@ -161,12 +161,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ormtr_scratchpad_size<fp>(
             queue, side, uplo, trans, m, n, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ormtr_scratchpad_size<fp>,
                                   side, uplo, trans, m, n, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -179,13 +179,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::ormtr(
+        sycl::event func_event = oneapi::math::lapack::ormtr(
             queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ormtr, side, uplo, trans,
-                                  m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormtr, side, uplo,
+                                  trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
diff --git a/tests/unit_tests/lapack/source/potrf.cpp b/tests/unit_tests/lapack/source/potrf.cpp
index 7d2df8ea9..2cd37de30 100644
--- a/tests/unit_tests/lapack/source/potrf.cpp
+++ b/tests/unit_tests/lapack/source/potrf.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -43,7 +43,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -61,11 +61,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto A_dev = device_alloc<data_T>(queue, A.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::potrf_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::potrf_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::potrf_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -73,9 +73,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::potrf(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::potrf(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::potrf, uplo, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrf, uplo, n, A_dev, lda,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -95,7 +95,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -114,11 +114,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto A_dev = device_alloc<data_T>(queue, A.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::potrf_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::potrf_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::potrf_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -129,11 +129,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::potrf(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size,
-                                       std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::potrf(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size,
+                                        std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::potrf, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrf, uplo, n, A_dev,
                                   lda, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/potrf_batch_group.cpp b/tests/unit_tests/lapack/source/potrf_batch_group.cpp
index 4a5b8dd58..90ceaa5bf 100644
--- a/tests/unit_tests/lapack/source/potrf_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/potrf_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -46,8 +46,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Test Parameters */
-    std::vector<oneapi::mkl::uplo> uplo_vec = { oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::uplo::lower };
+    std::vector<oneapi::math::uplo> uplo_vec = { oneapi::math::uplo::upper,
+                                                 oneapi::math::uplo::lower };
     std::vector<int64_t> n_vec = { 4, 4 };
     std::vector<int64_t> lda_vec = { 5, 5 };
     std::vector<int64_t> group_sizes_vec = { 2, 2 };
@@ -90,13 +90,13 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>(
             queue, uplo_vec.data(), n_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>,
             uplo_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -113,11 +113,11 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::potrf_batch(queue, uplo_vec.data(), n_vec.data(), A_dev_ptrs,
-                                         lda_vec.data(), group_count, group_sizes_vec.data(),
-                                         scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::potrf_batch(queue, uplo_vec.data(), n_vec.data(), A_dev_ptrs,
+                                          lda_vec.data(), group_count, group_sizes_vec.data(),
+                                          scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::potrf_batch, uplo_vec.data(),
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrf_batch, uplo_vec.data(),
                                   n_vec.data(), A_dev_ptrs, lda_vec.data(), group_count,
                                   group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
 #endif
@@ -169,7 +169,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Test Parameters */
-    std::vector<oneapi::mkl::uplo> uplo_vec = { oneapi::mkl::uplo::upper };
+    std::vector<oneapi::math::uplo> uplo_vec = { oneapi::math::uplo::upper };
     std::vector<int64_t> n_vec = { 1 };
     std::vector<int64_t> lda_vec = { 1 };
     std::vector<int64_t> group_sizes_vec = { 1 };
@@ -213,13 +213,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>(
             queue, uplo_vec.data(), n_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>,
             uplo_vec.data(), n_vec.data(), lda_vec.data(), group_count, group_sizes_vec.data());
 #endif
         auto scratchpad_dev = device_alloc<fp>(queue, scratchpad_size);
@@ -238,13 +238,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::potrf_batch(
+        sycl::event func_event = oneapi::math::lapack::potrf_batch(
             queue, uplo_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(), group_count,
             group_sizes_vec.data(), scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::potrf_batch,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrf_batch,
                                   uplo_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(),
                                   group_count, group_sizes_vec.data(), scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
diff --git a/tests/unit_tests/lapack/source/potrf_batch_stride.cpp b/tests/unit_tests/lapack/source/potrf_batch_stride.cpp
index fae4f0bcc..15a5d8296 100644
--- a/tests/unit_tests/lapack/source/potrf_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/potrf_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               int64_t stride_a, int64_t batch_size, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -58,12 +58,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
 
         auto A_dev = device_alloc<data_T>(queue, A.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>(
             queue, uplo, n, lda, stride_a, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>, uplo, n,
+            queue, scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>, uplo, n,
             lda, stride_a, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -72,10 +72,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::potrf_batch(queue, uplo, n, A_dev, lda, stride_a, batch_size,
-                                         scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::potrf_batch(queue, uplo, n, A_dev, lda, stride_a, batch_size,
+                                          scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::potrf_batch, uplo, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrf_batch, uplo, n, A_dev, lda,
                                   stride_a, batch_size, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -105,7 +105,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     int64_t stride_a, int64_t batch_size, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -124,12 +124,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
 
         auto A_dev = device_alloc<data_T>(queue, A.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>(
             queue, uplo, n, lda, stride_a, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size<fp>, uplo, n,
+            queue, scratchpad_size = oneapi::math::lapack::potrf_batch_scratchpad_size<fp>, uplo, n,
             lda, stride_a, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -140,12 +140,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::potrf_batch(
+        sycl::event func_event = oneapi::math::lapack::potrf_batch(
             queue, uplo, n, A_dev, lda, stride_a, batch_size, scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::potrf_batch, uplo, n,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrf_batch, uplo, n,
                                   A_dev, lda, stride_a, batch_size, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/potri.cpp b/tests/unit_tests/lapack/source/potri.cpp
index cd2f86449..a76958e4e 100644
--- a/tests/unit_tests/lapack/source/potri.cpp
+++ b/tests/unit_tests/lapack/source/potri.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -43,7 +43,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -65,11 +65,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto A_dev = device_alloc<data_T>(queue, A.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::potri_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::potri_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::potri_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -77,9 +77,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::potri(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::potri(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::potri, uplo, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potri, uplo, n, A_dev, lda,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -102,7 +102,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     int64_t ldr = n;
     for (int64_t diag = 0; diag < n; diag++)
         resid[diag + diag * ldr] = static_cast<fp>(1.0);
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n, n, n,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, n, n,
                     1.0, A_initial.data(), lda, A.data(), lda, -1.0, resid.data(), ldr);
     auto norm_resid = reference::lange('1', n, n, resid.data(), ldr);
     auto rel_err = norm_resid / (norm_A * norm_Ainv * n * ulp);
@@ -125,7 +125,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -148,11 +148,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto A_dev = device_alloc<data_T>(queue, A.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::potri_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::potri_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::potri_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -163,11 +163,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::potri(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size,
-                                       std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::potri(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size,
+                                        std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::potri, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potri, uplo, n, A_dev,
                                   lda, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/potrs.cpp b/tests/unit_tests/lapack/source/potrs.cpp
index c534ec8ba..7c7a9b07b 100644
--- a/tests/unit_tests/lapack/source/potrs.cpp
+++ b/tests/unit_tests/lapack/source/potrs.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,8 +40,8 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t nrhs, int64_t lda,
-              int64_t ldb, uint64_t seed) {
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs,
+              int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -49,7 +49,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     std::vector<fp> A_initial(lda * n);
     std::vector<fp> B_initial(ldb * nrhs);
     rand_pos_def_matrix(seed, uplo, n, A_initial, lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, ldb);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, ldb);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> B = B_initial;
@@ -68,11 +68,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto B_dev = device_alloc<data_T>(queue, B.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::potrs_scratchpad_size<fp>(queue, uplo, n, nrhs, lda, ldb);
+            oneapi::math::lapack::potrs_scratchpad_size<fp>(queue, uplo, n, nrhs, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::potrs_scratchpad_size<fp>,
                                   uplo, n, nrhs, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -82,10 +82,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::potrs(queue, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::potrs, uplo, n, nrhs, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrs, uplo, n, nrhs, A_dev, lda,
                                   B_dev, ldb, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -106,7 +106,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t nrhs,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs,
                     int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -115,7 +115,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
     std::vector<fp> A_initial(lda * n);
     std::vector<fp> B_initial(ldb * nrhs);
     rand_pos_def_matrix(seed, uplo, n, A_initial, lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, ldb);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, ldb);
 
     std::vector<fp> A = A_initial;
     std::vector<fp> B = B_initial;
@@ -135,11 +135,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto B_dev = device_alloc<data_T>(queue, B.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::potrs_scratchpad_size<fp>(queue, uplo, n, nrhs, lda, ldb);
+            oneapi::math::lapack::potrs_scratchpad_size<fp>(queue, uplo, n, nrhs, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::potrs_scratchpad_size<fp>,
                                   uplo, n, nrhs, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -151,12 +151,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event =
-            oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+        sycl::event func_event = oneapi::math::lapack::potrs(
+            queue, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size,
+            std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::potrs, uplo, n, nrhs,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrs, uplo, n, nrhs,
                                   A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/potrs_batch_group.cpp b/tests/unit_tests/lapack/source/potrs_batch_group.cpp
index 35c5ead0c..f469e69c1 100644
--- a/tests/unit_tests/lapack/source/potrs_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/potrs_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -46,8 +46,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Test Parameters */
-    std::vector<oneapi::mkl::uplo> uplo_vec = { oneapi::mkl::uplo::upper,
-                                                oneapi::mkl::uplo::lower };
+    std::vector<oneapi::math::uplo> uplo_vec = { oneapi::math::uplo::upper,
+                                                 oneapi::math::uplo::lower };
     std::vector<int64_t> n_vec = { 4, 5 };
     std::vector<int64_t> nrhs_vec = { 9, 6 };
     std::vector<int64_t> lda_vec = { 6, 6 };
@@ -81,7 +81,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
 
             B_initial_list.emplace_back(ldb * nrhs);
             auto& B_initial = B_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, lda);
 
             B_list.emplace_back(B_initial);
             auto& B = B_list.back();
@@ -114,13 +114,13 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>(
             queue, uplo_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>,
             uplo_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #endif
@@ -143,12 +143,12 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::potrs_batch(queue, uplo_vec.data(), n_vec.data(), nrhs_vec.data(),
-                                         A_dev_ptrs, lda_vec.data(), B_dev_ptrs, ldb_vec.data(),
-                                         group_count, group_sizes_vec.data(), scratchpad_dev,
-                                         scratchpad_size);
+        oneapi::math::lapack::potrs_batch(queue, uplo_vec.data(), n_vec.data(), nrhs_vec.data(),
+                                          A_dev_ptrs, lda_vec.data(), B_dev_ptrs, ldb_vec.data(),
+                                          group_count, group_sizes_vec.data(), scratchpad_dev,
+                                          scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::potrs_batch, uplo_vec.data(),
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrs_batch, uplo_vec.data(),
                                   n_vec.data(), nrhs_vec.data(), A_dev_ptrs, lda_vec.data(),
                                   B_dev_ptrs, ldb_vec.data(), group_count, group_sizes_vec.data(),
                                   scratchpad_dev, scratchpad_size);
@@ -209,7 +209,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Test Parameters */
-    std::vector<oneapi::mkl::uplo> uplo_vec = { oneapi::mkl::uplo::upper };
+    std::vector<oneapi::math::uplo> uplo_vec = { oneapi::math::uplo::upper };
     std::vector<int64_t> n_vec = { 1 };
     std::vector<int64_t> nrhs_vec = { 1 };
     std::vector<int64_t> lda_vec = { 1 };
@@ -243,7 +243,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
 
             B_initial_list.emplace_back(ldb * nrhs);
             auto& B_initial = B_initial_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, lda);
 
             B_list.emplace_back(B_initial);
             auto& B = B_list.back();
@@ -277,13 +277,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>(
             queue, uplo_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>,
             uplo_vec.data(), n_vec.data(), nrhs_vec.data(), lda_vec.data(), ldb_vec.data(),
             group_count, group_sizes_vec.data());
 #endif
@@ -308,13 +308,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::potrs_batch(
+        sycl::event func_event = oneapi::math::lapack::potrs_batch(
             queue, uplo_vec.data(), n_vec.data(), nrhs_vec.data(), A_dev_ptrs, lda_vec.data(),
             B_dev_ptrs, ldb_vec.data(), group_count, group_sizes_vec.data(), scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::potrs_batch,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrs_batch,
                                   uplo_vec.data(), n_vec.data(), nrhs_vec.data(), A_dev_ptrs,
                                   lda_vec.data(), B_dev_ptrs, ldb_vec.data(), group_count,
                                   group_sizes_vec.data(), scratchpad_dev, scratchpad_size,
diff --git a/tests/unit_tests/lapack/source/potrs_batch_stride.cpp b/tests/unit_tests/lapack/source/potrs_batch_stride.cpp
index de2568e86..a46f2816b 100644
--- a/tests/unit_tests/lapack/source/potrs_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/potrs_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,8 +40,9 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t nrhs, int64_t lda,
-              int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size, uint64_t seed) {
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs,
+              int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size,
+              uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -50,7 +51,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     std::vector<fp> B_initial(stride_b * batch_size);
     for (int64_t i = 0; i < batch_size; i++) {
         rand_pos_def_matrix(seed, uplo, n, A_initial, lda, i * stride_a);
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, ldb, i * stride_b);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, ldb, i * stride_b);
     }
 
     std::vector<fp> A = A_initial;
@@ -72,12 +73,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto B_dev = device_alloc<data_T>(queue, B.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>(
             queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>, uplo, n,
+            queue, scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>, uplo, n,
             nrhs, lda, stride_a, ldb, stride_b, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -87,10 +88,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, A_dev, lda, stride_a, B_dev, ldb,
-                                         stride_b, batch_size, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::potrs_batch(queue, uplo, n, nrhs, A_dev, lda, stride_a, B_dev, ldb,
+                                          stride_b, batch_size, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::potrs_batch, uplo, n, nrhs, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrs_batch, uplo, n, nrhs, A_dev,
                                   lda, stride_a, B_dev, ldb, stride_b, batch_size, scratchpad_dev,
                                   scratchpad_size);
 #endif
@@ -123,7 +124,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t nrhs,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs,
                     int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b,
                     int64_t batch_size, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
@@ -134,7 +135,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
     std::vector<fp> B_initial(stride_b * batch_size);
     for (int64_t i = 0; i < batch_size; i++) {
         rand_pos_def_matrix(seed, uplo, n, A_initial, lda, i * stride_a);
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B_initial, ldb, i * stride_b);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B_initial, ldb, i * stride_b);
     }
 
     std::vector<fp> A = A_initial;
@@ -157,12 +158,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto B_dev = device_alloc<data_T>(queue, B.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>(
             queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size<fp>, uplo, n,
+            queue, scratchpad_size = oneapi::math::lapack::potrs_batch_scratchpad_size<fp>, uplo, n,
             nrhs, lda, stride_a, ldb, stride_b, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -174,12 +175,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::potrs_batch(
+        sycl::event func_event = oneapi::math::lapack::potrs_batch(
             queue, uplo, n, nrhs, A_dev, lda, stride_a, B_dev, ldb, stride_b, batch_size,
             scratchpad_dev, scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::potrs_batch, uplo, n,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrs_batch, uplo, n,
                                   nrhs, A_dev, lda, stride_a, B_dev, ldb, stride_b, batch_size,
                                   scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
diff --git a/tests/unit_tests/lapack/source/syevd.cpp b/tests/unit_tests/lapack/source/syevd.cpp
index 291713354..5e013aead 100644
--- a/tests/unit_tests/lapack/source/syevd.cpp
+++ b/tests/unit_tests/lapack/source/syevd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, int64_t n,
+bool accuracy(const sycl::device& dev, oneapi::math::job jobz, oneapi::math::uplo uplo, int64_t n,
               int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -59,11 +59,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::syevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
+            oneapi::math::lapack::syevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::syevd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::syevd_scratchpad_size<fp>,
                                   jobz, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -72,10 +72,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::syevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::syevd, jobz, uplo, n, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::syevd, jobz, uplo, n, A_dev, lda,
                                   w_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -97,7 +97,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
+bool usm_dependency(const sycl::device& dev, oneapi::math::job jobz, oneapi::math::uplo uplo,
                     int64_t n, int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -117,11 +117,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl:
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::syevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
+            oneapi::math::lapack::syevd_scratchpad_size<fp>(queue, jobz, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::syevd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::syevd_scratchpad_size<fp>,
                                   jobz, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -133,11 +133,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::job jobz, oneapi::mkl:
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::syevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::syevd, jobz, uplo, n,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::syevd, jobz, uplo, n,
                                   A_dev, lda, w_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/sygvd.cpp b/tests/unit_tests/lapack/source/sygvd.cpp
index f800b03dd..b1f73ada6 100644
--- a/tests/unit_tests/lapack/source/sygvd.cpp
+++ b/tests/unit_tests/lapack/source/sygvd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -42,8 +42,8 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo,
-              int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
+bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz,
+              oneapi::math::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -65,11 +65,11 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::sygvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
+            oneapi::math::lapack::sygvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::sygvd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::sygvd_scratchpad_size<fp>,
                                   itype, jobz, uplo, n, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -79,10 +79,10 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::sygvd(queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::sygvd, itype, jobz, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::sygvd, itype, jobz, uplo, n, A_dev,
                                   lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -107,9 +107,9 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
 
     /* |D_ref - D| < |D_ref| O(eps) */
     std::vector<fp_real> D_ref(n);
-    auto info =
-        reference::sygvd(itype, oneapi::mkl::job::novec, uplo, n, std::vector<fp>(A_initial).data(),
-                         lda, std::vector<fp>(B_initial).data(), ldb, D_ref.data());
+    auto info = reference::sygvd(itype, oneapi::math::job::novec, uplo, n,
+                                 std::vector<fp>(A_initial).data(), lda,
+                                 std::vector<fp>(B_initial).data(), ldb, D_ref.data());
     if (0 != info) {
         test_log::lout << "reference sygvd failed with info = " << info << std::endl;
         return false;
@@ -119,17 +119,17 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
         result = false;
     }
 
-    if (oneapi::mkl::job::vec == jobz) {
+    if (oneapi::math::job::vec == jobz) {
         if (itype == 1) {
             /* |A Z - B Z D| < |A Z| O(eps) */
             std::vector<fp> AZ(n * n);
             int64_t ldaz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, A_initial.data(), lda, Z.data(), ldz, 0.0, AZ.data(), ldaz);
 
             std::vector<fp> BZ(n * n);
             int64_t ldbz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, B_initial.data(), ldb, Z.data(), ldz, 0.0, BZ.data(), ldbz);
 
             std::vector<fp> BZD(n * n);
@@ -146,8 +146,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |I - Z' B Z| < n O(eps) */
             std::vector<fp> ZBZ(n * n);
             int64_t ldzbz = n;
-            reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n,
-                            n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
+            reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                            n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
             if (!rel_id_err_check(n, ZBZ, ldzbz)) {
                 test_log::lout << "Orthogonality check failed" << std::endl;
                 result = false;
@@ -157,12 +157,12 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |A B Z - Z D| < |A B Z| O(eps) */
             std::vector<fp> BZ(n * n);
             int64_t ldbz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, B_initial.data(), ldb, Z.data(), ldz, 0.0, BZ.data(), ldbz);
 
             std::vector<fp> ABZ(n * n);
             int64_t ldabz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, A_initial.data(), lda, BZ.data(), ldbz, 0.0, ABZ.data(),
                             ldabz);
 
@@ -180,8 +180,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |I - Z' B Z| < n O(eps) */
             std::vector<fp> ZBZ(n * n);
             int64_t ldzbz = n;
-            reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n,
-                            n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
+            reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                            n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz);
             if (!rel_id_err_check(n, ZBZ, ldzbz)) {
                 test_log::lout << "Orthogonality check failed" << std::endl;
                 result = false;
@@ -192,7 +192,7 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* C = B^-1 Z */
             std::vector<fp> AZ(n * n);
             int64_t ldaz = n;
-            reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n,
+            reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n,
                             n, n, 1.0, A_initial.data(), lda, Z.data(), ldz, 0.0, AZ.data(), ldaz);
 
             std::vector<fp> C(n * n);
@@ -218,8 +218,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz, one
             /* |I - Z' B^-1 Z| = |I - Z' C| < n O(eps) */
             std::vector<fp> ZhC(n * n);
             int64_t ldzhc = n;
-            reference::gemm(oneapi::mkl::transpose::conjtrans, oneapi::mkl::transpose::nontrans, n,
-                            n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc);
+            reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans,
+                            n, n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc);
             if (!rel_id_err_check(n, ZhC, ldzhc)) {
                 test_log::lout << "Orthogonality check failed" << std::endl;
                 result = false;
@@ -234,8 +234,8 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::mkl::job jobz,
-                    oneapi::mkl::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
+bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::math::job jobz,
+                    oneapi::math::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
@@ -258,11 +258,11 @@ bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::mkl::job job
         auto w_dev = device_alloc<data_T, fp_real>(queue, w.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::sygvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
+            oneapi::math::lapack::sygvd_scratchpad_size<fp>(queue, itype, jobz, uplo, n, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::sygvd_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::sygvd_scratchpad_size<fp>,
                                   itype, jobz, uplo, n, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -274,14 +274,14 @@ bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::mkl::job job
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::sygvd(
+        sycl::event func_event = oneapi::math::lapack::sygvd(
             queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::sygvd, itype, jobz, uplo,
-                                  n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size,
-                                  std::vector<sycl::event>{ in_event });
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::sygvd, itype, jobz,
+                                  uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev,
+                                  scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
 
diff --git a/tests/unit_tests/lapack/source/sytrd.cpp b/tests/unit_tests/lapack/source/sytrd.cpp
index 01ffe0dff..a9cd82d48 100644
--- a/tests/unit_tests/lapack/source/sytrd.cpp
+++ b/tests/unit_tests/lapack/source/sytrd.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -50,7 +50,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     std::vector<fp_real> d(n);
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     std::vector<fp> A_initial = A;
 
     /* Compute on device */
@@ -63,11 +63,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::sytrd_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::sytrd_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::sytrd_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::sytrd_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -78,10 +78,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::sytrd(queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::sytrd(queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::sytrd, uplo, n, A_dev, lda, d_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::sytrd, uplo, n, A_dev, lda, d_dev,
                                   e_dev, tau_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -114,9 +114,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
 
     std::vector<fp> QTQ{ T };
     int64_t ldqtq = n;
-    reference::or_un_mtr(oneapi::mkl::side::left, uplo, oneapi::mkl::transpose::nontrans, n, n,
+    reference::or_un_mtr(oneapi::math::side::left, uplo, oneapi::math::transpose::nontrans, n, n,
                          A.data(), lda, tau.data(), QTQ.data(), ldqtq);
-    reference::or_un_mtr(oneapi::mkl::side::right, uplo, oneapi::mkl::transpose::trans, n, n,
+    reference::or_un_mtr(oneapi::math::side::right, uplo, oneapi::math::transpose::trans, n, n,
                          A.data(), lda, tau.data(), QTQ.data(), ldqtq);
 
     if (!rel_mat_err_check(n, n, QTQ, ldqtq, A_initial, lda)) {
@@ -127,7 +127,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     /* A[i, i] = d[i] */
     for (int64_t diag = 0; diag < n; diag++)
         d[diag] -= A[diag + diag * lda];
-    if (uplo == oneapi::mkl::uplo::upper)
+    if (uplo == oneapi::math::uplo::upper)
         for (int64_t diag = 0; diag < n - 1; diag++)
             e[diag] -= A[diag + (diag + 1) * lda];
     else
@@ -152,7 +152,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -162,7 +162,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
     std::vector<fp_real> d(n);
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     std::vector<fp> A_initial = A;
 
     /* Compute on device */
@@ -176,11 +176,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::sytrd_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::sytrd_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::sytrd_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::sytrd_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -193,12 +193,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::sytrd(
+        sycl::event func_event = oneapi::math::lapack::sytrd(
             queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev, scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::sytrd, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::sytrd, uplo, n, A_dev,
                                   lda, d_dev, e_dev, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/sytrf.cpp b/tests/unit_tests/lapack/source/sytrf.cpp
index 81d7fdb2d..39d17a0d4 100644
--- a/tests/unit_tests/lapack/source/sytrf.cpp
+++ b/tests/unit_tests/lapack/source/sytrf.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -60,11 +60,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::sytrf_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::sytrf_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::sytrf_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -73,10 +73,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::sytrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::sytrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::sytrf, uplo, n, A_dev, lda, ipiv_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::sytrf, uplo, n, A_dev, lda, ipiv_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -101,7 +101,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     for (int64_t d = 0; d < n; d++)
         U[d + d * ldu] = 1.0;
 
-    if (uplo == oneapi::mkl::uplo::upper) {
+    if (uplo == oneapi::math::uplo::upper) {
         int64_t k = n - 1;
         while (k >= 0) {
             reference::laset('A', n, n, 0.0, 1.0, Uk.data(), ldu);
@@ -114,9 +114,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu),
                                     ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + k * ldd] = A[k + k * lda];
                 k -= 1;
@@ -132,9 +132,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k - 1 + 0 * ldu), ldu,
                                     Uk.data() + (piv + 0 * ldu), ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + k * ldd] = A[k + k * lda];
                 D[k - 1 + (k - 1) * ldd] = A[k - 1 + (k - 1) * lda];
@@ -157,9 +157,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k + 0 * lda), ldu, Uk.data() + (piv + 0 * ldu),
                                     ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + (k)*ldd] = A[k + (k)*lda];
                 k += 1;
@@ -175,9 +175,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
                     reference::swap(n, Uk.data() + (k + 1 + 0 * ldu), ldu,
                                     Uk.data() + (piv + 0 * ldu), ldu);
                 auto U_temp = U;
-                reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
-                                n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(),
-                                ldu);
+                reference::gemm(oneapi::math::transpose::nontrans,
+                                oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu,
+                                Uk.data(), ldu, 0.0, U.data(), ldu);
 
                 D[k + k * ldd] = A[k + k * lda];
                 D[k + 1 + (k + 1) * ldd] = A[k + 1 + (k + 1) * lda];
@@ -191,12 +191,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     /* |A - UDU'| < |A| O(eps) */
     std::vector<fp> UD(n * n);
     int64_t ldud = n;
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, n, n, n,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, n, n,
                     1.0, U.data(), ldu, D.data(), ldd, 0.0, UD.data(), ldud);
 
     std::vector<fp> UDU(n * n);
     int64_t ldudu = n;
-    reference::gemm(oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, n, n, n, 1.0,
+    reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::trans, n, n, n, 1.0,
                     UD.data(), ldud, U.data(), ldu, 0.0, UDU.data(), ldudu);
 
     if (!rel_mat_err_check(n, n, UDU, ldudu, A_initial, lda)) {
@@ -212,7 +212,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -233,11 +233,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto ipiv_dev = device_alloc<data_T, int64_t>(queue, ipiv.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::sytrf_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::sytrf_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::sytrf_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -249,11 +249,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::sytrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::sytrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::sytrf, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::sytrf, uplo, n, A_dev,
                                   lda, ipiv_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/trtrs.cpp b/tests/unit_tests/lapack/source/trtrs.cpp
index 4018a2c51..3f847028c 100644
--- a/tests/unit_tests/lapack/source/trtrs.cpp
+++ b/tests/unit_tests/lapack/source/trtrs.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -47,8 +47,8 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-              oneapi::mkl::diag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+              oneapi::math::diag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -58,8 +58,8 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl::tran
     std::vector<fp> B(ldb * nrhs);
 
     /* Initialize input data */
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B, ldb);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B, ldb);
     std::vector<fp> B_initial = B;
 
     /* Compute on device */
@@ -70,12 +70,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl::tran
         auto B_dev = device_alloc<data_T>(queue, B.size());
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::trtrs_scratchpad_size<fp>(
             queue, uplo, trans, diag, n, nrhs, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::trtrs_scratchpad_size<fp>,
                                   uplo, trans, diag, n, nrhs, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -85,10 +85,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl::tran
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, A_dev, lda, B_dev, ldb,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, A_dev, lda, B_dev, ldb,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::trtrs, uplo, trans, diag, n, nrhs,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::trtrs, uplo, trans, diag, n, nrhs,
                                   A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -109,8 +109,8 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans,
-                    oneapi::mkl::diag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, oneapi::math::transpose trans,
+                    oneapi::math::diag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -120,8 +120,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl
     std::vector<fp> B(ldb * nrhs);
 
     /* Initialize input data */
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, nrhs, B, ldb);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, nrhs, B, ldb);
     std::vector<fp> B_initial = B;
 
     /* Compute on device */
@@ -133,12 +133,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl
         auto B_dev = device_alloc<data_T>(queue, B.size());
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::trtrs_scratchpad_size<fp>(
             queue, uplo, trans, diag, n, nrhs, lda, ldb);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::trtrs_scratchpad_size<fp>,
                                   uplo, trans, diag, n, nrhs, lda, ldb);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -150,14 +150,14 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, oneapi::mkl
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::trtrs(
+        sycl::event func_event = oneapi::math::lapack::trtrs(
             queue, uplo, trans, diag, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::trtrs, uplo, trans, diag,
-                                  n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size,
-                                  std::vector<sycl::event>{ in_event });
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::trtrs, uplo, trans,
+                                  diag, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev,
+                                  scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
 
diff --git a/tests/unit_tests/lapack/source/ungbr.cpp b/tests/unit_tests/lapack/source/ungbr.cpp
index 7cdf8e52a..f73085664 100644
--- a/tests/unit_tests/lapack/source/ungbr.cpp
+++ b/tests/unit_tests/lapack/source/ungbr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -43,7 +43,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k,
+bool accuracy(const sycl::device& dev, oneapi::math::generate vect, int64_t m, int64_t n, int64_t k,
               int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -52,9 +52,9 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
     int64_t m_A = m;
     int64_t n_A = n;
 
-    if (vect == oneapi::mkl::generate::Q)
+    if (vect == oneapi::math::generate::Q)
         n_A = k;
-    else /* vect == oneapi::mkl::generate::P */
+    else /* vect == oneapi::math::generate::P */
         m_A = k;
 
     int64_t min_mn_A = std::min<int64_t>(m_A, n_A);
@@ -65,10 +65,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
     std::vector<fp> tauq(min_mn_A);
     std::vector<fp> taup(min_mn_A);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m_A, n_A, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m_A, n_A, A, lda);
     reference::gebrd(m_A, n_A, A.data(), lda, d.data(), e.data(), tauq.data(), taup.data());
 
-    auto& tau = (vect == oneapi::mkl::generate::Q) ? tauq : taup;
+    auto& tau = (vect == oneapi::math::generate::Q) ? tauq : taup;
 
     /* Compute on device */
     {
@@ -79,11 +79,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
 
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::ungbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
+            oneapi::math::lapack::ungbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ungbr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ungbr_scratchpad_size<fp>,
                                   vect, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -93,10 +93,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, in
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ungbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::ungbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ungbr, vect, m, n, k, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungbr, vect, m, n, k, A_dev, lda,
                                   tau_dev, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -117,7 +117,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t m, int64_t n,
+bool usm_dependency(const sycl::device& dev, oneapi::math::generate vect, int64_t m, int64_t n,
                     int64_t k, int64_t lda, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -126,9 +126,9 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
     int64_t m_A = m;
     int64_t n_A = n;
 
-    if (vect == oneapi::mkl::generate::Q)
+    if (vect == oneapi::math::generate::Q)
         n_A = k;
-    else /* vect == oneapi::mkl::generate::P */
+    else /* vect == oneapi::math::generate::P */
         m_A = k;
 
     int64_t min_mn_A = std::min<int64_t>(m_A, n_A);
@@ -139,10 +139,10 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
     std::vector<fp> tauq(min_mn_A);
     std::vector<fp> taup(min_mn_A);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m_A, n_A, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m_A, n_A, A, lda);
     reference::gebrd(m_A, n_A, A.data(), lda, d.data(), e.data(), tauq.data(), taup.data());
 
-    auto& tau = (vect == oneapi::mkl::generate::Q) ? tauq : taup;
+    auto& tau = (vect == oneapi::math::generate::Q) ? tauq : taup;
 
     /* Compute on device */
     bool result;
@@ -154,11 +154,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
 
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::ungbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
+            oneapi::math::lapack::ungbr_scratchpad_size<fp>(queue, vect, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::ungbr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::ungbr_scratchpad_size<fp>,
                                   vect, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -171,11 +171,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::generate vect, int64_t
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::ungbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::ungbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ungbr, vect, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungbr, vect, m, n, k,
                                   A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/ungqr.cpp b/tests/unit_tests/lapack/source/ungqr.cpp
index 08b8b1192..115b2e5e5 100644
--- a/tests/unit_tests/lapack/source/ungqr.cpp
+++ b/tests/unit_tests/lapack/source/ungqr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -51,7 +51,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
 
     /* Initialize */
     std::vector<fp> A(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::geqrf(m, k, A.data(), lda, tau.data());
@@ -67,11 +67,11 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::ungqr_scratchpad_size<fp>(queue, m, n, k, lda);
+            oneapi::math::lapack::ungqr_scratchpad_size<fp>(queue, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungqr_scratchpad_size<fp>, m, n, k, lda);
+            queue, scratchpad_size = oneapi::math::lapack::ungqr_scratchpad_size<fp>, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -80,10 +80,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ungqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::ungqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ungqr, m, n, k, A_dev, lda, tau_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungqr, m, n, k, A_dev, lda, tau_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -111,7 +111,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
 
     /* Initialize */
     std::vector<fp> A(lda * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::geqrf(m, k, A.data(), lda, tau.data());
@@ -128,11 +128,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::ungqr_scratchpad_size<fp>(queue, m, n, k, lda);
+            oneapi::math::lapack::ungqr_scratchpad_size<fp>(queue, m, n, k, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungqr_scratchpad_size<fp>, m, n, k, lda);
+            queue, scratchpad_size = oneapi::math::lapack::ungqr_scratchpad_size<fp>, m, n, k, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -144,11 +144,11 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::ungqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::ungqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ungqr, m, n, k, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungqr, m, n, k, A_dev,
                                   lda, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/ungqr_batch_group.cpp b/tests/unit_tests/lapack/source/ungqr_batch_group.cpp
index ddb350828..9ed03c1cf 100644
--- a/tests/unit_tests/lapack/source/ungqr_batch_group.cpp
+++ b/tests/unit_tests/lapack/source/ungqr_batch_group.cpp
@@ -28,7 +28,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -69,7 +69,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_list.emplace_back(lda * n);
             auto& A = A_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
 
             tau_list.emplace_back(k);
             auto& tau = tau_list.back();
@@ -101,13 +101,13 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #endif
@@ -130,11 +130,11 @@ bool accuracy(const sycl::device& dev, uint64_t seed) {
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ungqr_batch(queue, m_vec.data(), n_vec.data(), k_vec.data(),
-                                         A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count,
-                                         group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::ungqr_batch(queue, m_vec.data(), n_vec.data(), k_vec.data(),
+                                          A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count,
+                                          group_sizes_vec.data(), scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ungqr_batch, m_vec.data(),
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungqr_batch, m_vec.data(),
                                   n_vec.data(), k_vec.data(), A_dev_ptrs, lda_vec.data(),
                                   tau_dev_ptrs, group_count, group_sizes_vec.data(), scratchpad_dev,
                                   scratchpad_size);
@@ -213,7 +213,7 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         for (int64_t local_id = 0; local_id < group_size; local_id++) {
             A_list.emplace_back(lda * n);
             auto& A = A_list.back();
-            rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda);
+            rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda);
 
             tau_list.emplace_back(k);
             auto& tau = tau_list.back();
@@ -246,13 +246,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         }
 
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>(
             queue, m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>,
+            queue, scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>,
             m_vec.data(), n_vec.data(), k_vec.data(), lda_vec.data(), group_count,
             group_sizes_vec.data());
 #endif
@@ -277,13 +277,13 @@ bool usm_dependency(const sycl::device& dev, uint64_t seed) {
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::ungqr_batch(
+        sycl::event func_event = oneapi::math::lapack::ungqr_batch(
             queue, m_vec.data(), n_vec.data(), k_vec.data(), A_dev_ptrs, lda_vec.data(),
             tau_dev_ptrs, group_count, group_sizes_vec.data(), scratchpad_dev, scratchpad_size,
             std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ungqr_batch,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungqr_batch,
                                   m_vec.data(), n_vec.data(), k_vec.data(), A_dev_ptrs,
                                   lda_vec.data(), tau_dev_ptrs, group_count, group_sizes_vec.data(),
                                   scratchpad_dev, scratchpad_size,
diff --git a/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp b/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp
index e656b9fb7..446bfd8f8 100644
--- a/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp
+++ b/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -50,7 +50,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
     std::vector<fp> tau(stride_tau * batch_size);
 
     for (int64_t i = 0; i < batch_size; i++) {
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda, i * stride_a);
         auto info =
             reference::geqrf(m, k, A.data() + i * stride_a, lda, tau.data() + i * stride_tau);
         if (0 != info) {
@@ -67,12 +67,12 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>(
             queue, m, n, k, lda, stride_a, stride_tau, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>, m, n, k,
+            queue, scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>, m, n, k,
             lda, stride_a, stride_tau, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -82,10 +82,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau,
-                                         batch_size, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::ungqr_batch(queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau,
+                                          batch_size, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ungqr_batch, m, n, k, A_dev, lda,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungqr_batch, m, n, k, A_dev, lda,
                                   stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev,
                                   scratchpad_size);
 #endif
@@ -127,7 +127,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
     std::vector<fp> tau(stride_tau * batch_size);
 
     for (int64_t i = 0; i < batch_size; i++) {
-        rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, A, lda, i * stride_a);
+        rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, A, lda, i * stride_a);
         auto info =
             reference::geqrf(m, k, A.data() + i * stride_a, lda, tau.data() + i * stride_tau);
         if (0 != info) {
@@ -145,12 +145,12 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         auto A_dev = device_alloc<data_T>(queue, A.size());
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>(
             queue, m, n, k, lda, stride_a, stride_tau, batch_size);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size<fp>, m, n, k,
+            queue, scratchpad_size = oneapi::math::lapack::ungqr_batch_scratchpad_size<fp>, m, n, k,
             lda, stride_a, stride_tau, batch_size);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -162,12 +162,12 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::ungqr_batch(
+        sycl::event func_event = oneapi::math::lapack::ungqr_batch(
             queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ungqr_batch, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungqr_batch, m, n, k,
                                   A_dev, lda, stride_a, tau_dev, stride_tau, batch_size,
                                   scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
diff --git a/tests/unit_tests/lapack/source/ungtr.cpp b/tests/unit_tests/lapack/source/ungtr.cpp
index b0ad8e8f2..ed516b03c 100644
--- a/tests/unit_tests/lapack/source/ungtr.cpp
+++ b/tests/unit_tests/lapack/source/ungtr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,7 +40,7 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
               uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -51,7 +51,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     auto info = reference::hetrd(uplo, n, A.data(), lda, d.data(), e.data(), tau.data());
     if (0 != info) {
         test_log::lout << "reference hetrd failed with info = " << info << std::endl;
@@ -66,11 +66,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::ungtr_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::ungtr_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungtr_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::ungtr_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -79,10 +79,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::ungtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                   scratchpad_size);
+        oneapi::math::lapack::ungtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                    scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::ungtr, uplo, n, A_dev, lda, tau_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungtr, uplo, n, A_dev, lda, tau_dev,
                                   scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -103,7 +103,7 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t lda,
                     uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
@@ -114,7 +114,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
     std::vector<fp_real> e(n);
     std::vector<fp> tau(n);
 
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
     auto info = reference::hetrd(uplo, n, A.data(), lda, d.data(), e.data(), tau.data());
     if (0 != info) {
         test_log::lout << "reference hetrd failed with info = " << info << std::endl;
@@ -130,11 +130,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
 #ifdef CALL_RT_API
         const auto scratchpad_size =
-            oneapi::mkl::lapack::ungtr_scratchpad_size<fp>(queue, uplo, n, lda);
+            oneapi::math::lapack::ungtr_scratchpad_size<fp>(queue, uplo, n, lda);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(
-            queue, scratchpad_size = oneapi::mkl::lapack::ungtr_scratchpad_size<fp>, uplo, n, lda);
+            queue, scratchpad_size = oneapi::math::lapack::ungtr_scratchpad_size<fp>, uplo, n, lda);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
 
@@ -146,11 +146,11 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t n,
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
         sycl::event func_event =
-            oneapi::mkl::lapack::ungtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
-                                       scratchpad_size, std::vector<sycl::event>{ in_event });
+            oneapi::math::lapack::ungtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev,
+                                        scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::ungtr, uplo, n, A_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungtr, uplo, n, A_dev,
                                   lda, tau_dev, scratchpad_dev, scratchpad_size,
                                   std::vector<sycl::event>{ in_event });
 #endif
diff --git a/tests/unit_tests/lapack/source/unmqr.cpp b/tests/unit_tests/lapack/source/unmqr.cpp
index 2f555c1ca..c0a147988 100644
--- a/tests/unit_tests/lapack/source/unmqr.cpp
+++ b/tests/unit_tests/lapack/source/unmqr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -47,19 +47,19 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl::transpose trans,
+bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::math::transpose trans,
               int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
-    int64_t nq = (left_right == oneapi::mkl::side::left) ? m : n;
+    int64_t nq = (left_right == oneapi::math::side::left) ? m : n;
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::geqrf(nq, k, A.data(), lda, tau.data());
@@ -75,12 +75,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::unmqr_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::unmqr_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -91,10 +91,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::unmqr(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
-                                   ldc, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::unmqr(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
+                                    ldc, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::unmqr, left_right, trans, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::unmqr, left_right, trans, m, n, k,
                                   A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -131,20 +131,20 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
-                    oneapi::mkl::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right,
+                    oneapi::math::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
                     int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
-    int64_t nq = (left_right == oneapi::mkl::side::left) ? m : n;
+    int64_t nq = (left_right == oneapi::math::side::left) ? m : n;
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::geqrf(nq, k, A.data(), lda, tau.data());
@@ -162,12 +162,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::unmqr_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::unmqr_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -180,13 +180,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::unmqr(
+        sycl::event func_event = oneapi::math::lapack::unmqr(
             queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::unmqr, left_right, trans,
-                                  m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmqr, left_right,
+                                  trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
diff --git a/tests/unit_tests/lapack/source/unmrq.cpp b/tests/unit_tests/lapack/source/unmrq.cpp
index 628063837..b41d5eda3 100644
--- a/tests/unit_tests/lapack/source/unmrq.cpp
+++ b/tests/unit_tests/lapack/source/unmrq.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,18 +40,18 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl::transpose trans,
+bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::math::transpose trans,
               int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
     int64_t nq;
-    if (left_right == oneapi::mkl::side::left) {
+    if (left_right == oneapi::math::side::left) {
         if (k > m) {
             test_log::lout << "Bad test input, side == left and k > m (" << k << " > " << m << ")"
                            << std::endl;
@@ -69,7 +69,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
     }
 
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::gerqf(nq, k, A.data(), lda, tau.data());
@@ -85,12 +85,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::unmrq_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::unmrq_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -101,10 +101,10 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::side left_right, oneapi::mkl
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::unmrq(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
-                                   ldc, scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::unmrq(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev,
+                                    ldc, scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::unmrq, left_right, trans, m, n, k,
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::unmrq, left_right, trans, m, n, k,
                                   A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
@@ -141,20 +141,20 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
-                    oneapi::mkl::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
+bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right,
+                    oneapi::math::transpose trans, int64_t m, int64_t n, int64_t k, int64_t lda,
                     int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
     std::vector<fp> C_initial(ldc * n);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C_initial, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C_initial, ldc);
     std::vector<fp> C = C_initial;
 
-    int64_t nq = (left_right == oneapi::mkl::side::left) ? m : n;
+    int64_t nq = (left_right == oneapi::math::side::left) ? m : n;
     std::vector<fp> A(lda * k);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, nq, k, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, nq, k, A, lda);
     std::vector<fp> tau(k);
 
     auto info = reference::gerqf(nq, k, A.data(), lda, tau.data());
@@ -171,12 +171,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::unmrq_scratchpad_size<fp>(
             queue, left_right, trans, m, n, k, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::unmrq_scratchpad_size<fp>,
                                   left_right, trans, m, n, k, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -189,13 +189,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::side left_right,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::unmrq(
+        sycl::event func_event = oneapi::math::lapack::unmrq(
             queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::unmrq, left_right, trans,
-                                  m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmrq, left_right,
+                                  trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
diff --git a/tests/unit_tests/lapack/source/unmtr.cpp b/tests/unit_tests/lapack/source/unmtr.cpp
index 8148c644d..e1f969204 100644
--- a/tests/unit_tests/lapack/source/unmtr.cpp
+++ b/tests/unit_tests/lapack/source/unmtr.cpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 #include "lapack_common.hpp"
 #include "lapack_test_controller.hpp"
 #include "lapack_accuracy_checks.hpp"
@@ -40,17 +40,17 @@ const char* accuracy_input = R"(
 )";
 
 template <typename data_T>
-bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_t n, int64_t lda,
+bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, int64_t n, int64_t lda,
               int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
-    oneapi::mkl::side side = oneapi::mkl::side::right;
-    oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
+    oneapi::math::side side = oneapi::math::side::right;
+    oneapi::math::transpose trans = oneapi::math::transpose::nontrans;
 
     std::vector<fp> A(n * lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
 
     std::vector<fp> tau(n);
     std::vector<fp_real> d(n);
@@ -62,7 +62,7 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_
     }
 
     std::vector<fp> C(n * ldc);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C, ldc);
     std::vector<fp> C_initial = C;
 
     /* Compute on device */
@@ -73,12 +73,12 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::unmtr_scratchpad_size<fp>(
             queue, side, uplo, trans, m, n, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::unmtr_scratchpad_size<fp>,
                                   side, uplo, trans, m, n, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -89,11 +89,11 @@ bool accuracy(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_
         queue.wait_and_throw();
 
 #ifdef CALL_RT_API
-        oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc,
-                                   scratchpad_dev, scratchpad_size);
+        oneapi::math::lapack::unmtr(queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc,
+                                    scratchpad_dev, scratchpad_size);
 #else
-        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::mkl::lapack::unmtr, side, uplo, trans, m, n, A_dev,
-                                  lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
+        TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::unmtr, side, uplo, trans, m, n,
+                                  A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size);
 #endif
         queue.wait_and_throw();
 
@@ -127,17 +127,17 @@ const char* dependency_input = R"(
 )";
 
 template <typename data_T>
-bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m, int64_t n,
+bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, int64_t n,
                     int64_t lda, int64_t ldc, uint64_t seed) {
     using fp = typename data_T_info<data_T>::value_type;
     using fp_real = typename complex_info<fp>::real_type;
 
     /* Initialize */
-    oneapi::mkl::side side = oneapi::mkl::side::right;
-    oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
+    oneapi::math::side side = oneapi::math::side::right;
+    oneapi::math::transpose trans = oneapi::math::transpose::nontrans;
 
     std::vector<fp> A(n * lda);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, n, n, A, lda);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, n, n, A, lda);
 
     std::vector<fp> tau(n);
     std::vector<fp_real> d(n);
@@ -149,7 +149,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m,
     }
 
     std::vector<fp> C(n * ldc);
-    rand_matrix(seed, oneapi::mkl::transpose::nontrans, m, n, C, ldc);
+    rand_matrix(seed, oneapi::math::transpose::nontrans, m, n, C, ldc);
     std::vector<fp> C_initial = C;
 
     /* Compute on device */
@@ -161,12 +161,12 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m,
         auto tau_dev = device_alloc<data_T>(queue, tau.size());
         auto C_dev = device_alloc<data_T>(queue, C.size());
 #ifdef CALL_RT_API
-        const auto scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size<fp>(
+        const auto scratchpad_size = oneapi::math::lapack::unmtr_scratchpad_size<fp>(
             queue, side, uplo, trans, m, n, lda, ldc);
 #else
         int64_t scratchpad_size;
         TEST_RUN_LAPACK_CT_SELECT(queue,
-                                  scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size<fp>,
+                                  scratchpad_size = oneapi::math::lapack::unmtr_scratchpad_size<fp>,
                                   side, uplo, trans, m, n, lda, ldc);
 #endif
         auto scratchpad_dev = device_alloc<data_T>(queue, scratchpad_size);
@@ -179,13 +179,13 @@ bool usm_dependency(const sycl::device& dev, oneapi::mkl::uplo uplo, int64_t m,
         /* Check dependency handling */
         auto in_event = create_dependency(queue);
 #ifdef CALL_RT_API
-        sycl::event func_event = oneapi::mkl::lapack::unmtr(
+        sycl::event func_event = oneapi::math::lapack::unmtr(
             queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
             scratchpad_size, std::vector<sycl::event>{ in_event });
 #else
         sycl::event func_event;
-        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::mkl::lapack::unmtr, side, uplo, trans,
-                                  m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
+        TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmtr, side, uplo,
+                                  trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev,
                                   scratchpad_size, std::vector<sycl::event>{ in_event });
 #endif
         result = check_dependency(queue, in_event, func_event);
diff --git a/tests/unit_tests/main_test.cpp b/tests/unit_tests/main_test.cpp
index fa7dffcc6..a170c45da 100644
--- a/tests/unit_tests/main_test.cpp
+++ b/tests/unit_tests/main_test.cpp
@@ -25,8 +25,8 @@
 #endif
 #include <string>
 #include "test_helper.hpp"
-#include "oneapi/mkl/detail/config.hpp"
-#include "oneapi/mkl.hpp"
+#include "oneapi/math/detail/config.hpp"
+#include "oneapi/math.hpp"
 
 #define MAX_STR 128
 
@@ -112,29 +112,30 @@ int main(int argc, char** argv) {
                     if (unique_devices.find(dev.get_info<sycl::info::device::name>()) ==
                         unique_devices.end()) {
                         unique_devices.insert(dev.get_info<sycl::info::device::name>());
-#if !defined(ONEMKL_ENABLE_MKLCPU_BACKEND) &&             \
-    !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \
-    !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) && !defined(ONEMKL_ENABLE_NETLIB_BACKEND)
+#if !defined(ONEMATH_ENABLE_MKLCPU_BACKEND) &&             \
+    !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \
+    !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) && !defined(ONEMATH_ENABLE_NETLIB_BACKEND)
                         if (dev.is_cpu())
                             continue;
 #endif
-#if !defined(ONEMKL_ENABLE_MKLGPU_BACKEND) && \
-    !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU) && !defined(ONEMKL_ENABLE_PORTFFT_BACKEND)
+#if !defined(ONEMATH_ENABLE_MKLGPU_BACKEND) &&             \
+    !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU) && \
+    !defined(ONEMATH_ENABLE_PORTFFT_BACKEND)
                         if (dev.is_gpu() && vendor_id == INTEL_ID)
                             continue;
 #endif
-#if !defined(ONEMKL_ENABLE_CUBLAS_BACKEND) && !defined(ONEMKL_ENABLE_CURAND_BACKEND) && \
-    !defined(ONEMKL_ENABLE_CUSOLVER_BACKEND) &&                                         \
-    !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU) &&                              \
-    !defined(ONEMKL_ENABLE_CUFFT_BACKEND) && !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) && \
-    !defined(ONEMKL_ENABLE_CUSPARSE_BACKEND)
+#if !defined(ONEMATH_ENABLE_CUBLAS_BACKEND) && !defined(ONEMATH_ENABLE_CURAND_BACKEND) && \
+    !defined(ONEMATH_ENABLE_CUSOLVER_BACKEND) &&                                          \
+    !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU) &&                               \
+    !defined(ONEMATH_ENABLE_CUFFT_BACKEND) && !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) && \
+    !defined(ONEMATH_ENABLE_CUSPARSE_BACKEND)
                         if (dev.is_gpu() && vendor_id == NVIDIA_ID)
                             continue;
 #endif
-#if !defined(ONEMKL_ENABLE_ROCBLAS_BACKEND) && !defined(ONEMKL_ENABLE_ROCRAND_BACKEND) &&         \
-    !defined(ONEMKL_ENABLE_ROCSOLVER_BACKEND) &&                                                  \
-    !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU) && !defined(ONEMKL_ENABLE_ROCFFT_BACKEND) && \
-    !defined(ONEMKL_ENABLE_PORTFFT_BACKEND)
+#if !defined(ONEMATH_ENABLE_ROCBLAS_BACKEND) && !defined(ONEMATH_ENABLE_ROCRAND_BACKEND) && \
+    !defined(ONEMATH_ENABLE_ROCSOLVER_BACKEND) &&                                           \
+    !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU) &&                                    \
+    !defined(ONEMATH_ENABLE_ROCFFT_BACKEND) && !defined(ONEMATH_ENABLE_PORTFFT_BACKEND)
                         if (dev.is_gpu() && vendor_id == AMD_ID)
                             continue;
 #endif
@@ -143,9 +144,9 @@ int main(int argc, char** argv) {
                         if (dev.is_accelerator())
 #else
                         if (!dev.is_accelerator())
+// clang-format on
 #endif
                             local_devices.push_back(dev);
-                        // clang-format on
                     }
                 }
                 catch (std::exception const& e) {
diff --git a/tests/unit_tests/rng/device/include/moments.hpp b/tests/unit_tests/rng/device/include/moments.hpp
index 51fe22bcb..7b360d5c3 100644
--- a/tests/unit_tests/rng/device/include/moments.hpp
+++ b/tests/unit_tests/rng/device/include/moments.hpp
@@ -20,7 +20,7 @@
 /*
 *
 *  Content:
-*       oneapi::mkl::rng::device:: distributions moments test (SYCL interface)
+*       oneapi::math::rng::device:: distributions moments test (SYCL interface)
 *
 *******************************************************************************/
 
@@ -35,7 +35,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/rng/device.hpp"
+#include "oneapi/math/rng/device.hpp"
 
 #include "rng_device_test_common.hpp"
 
@@ -47,19 +47,20 @@ class moments_test {
         // Note: the following methods of discrete distributions require double precision support
         if ((std::is_same_v<
                  Distribution,
-                 oneapi::mkl::rng::device::uniform<
-                     std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>> ||
+                 oneapi::math::rng::device::uniform<
+                     std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>> ||
              std::is_same_v<
                  Distribution,
-                 oneapi::mkl::rng::device::uniform<
-                     std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>> ||
-             std::is_same_v<Distribution, oneapi::mkl::rng::device::poisson<
-                                              std::uint32_t,
-                                              oneapi::mkl::rng::device::poisson_method::devroye>> ||
+                 oneapi::math::rng::device::uniform<
+                     std::int32_t, oneapi::math::rng::device::uniform_method::accurate>> ||
              std::is_same_v<
                  Distribution,
-                 oneapi::mkl::rng::device::poisson<
-                     std::int32_t, oneapi::mkl::rng::device::poisson_method::devroye>>) &&
+                 oneapi::math::rng::device::poisson<
+                     std::uint32_t, oneapi::math::rng::device::poisson_method::devroye>> ||
+             std::is_same_v<
+                 Distribution,
+                 oneapi::math::rng::device::poisson<
+                     std::int32_t, oneapi::math::rng::device::poisson_method::devroye>>) &&
             !queue.get_device().has(sycl::aspect::fp64)) {
             status = test_skipped;
             return;
@@ -78,11 +79,11 @@ class moments_test {
                     size_t id = item.get_id(0);
                     auto multiplier = Engine::vec_size;
                     if constexpr (std::is_same_v<Distribution,
-                                                 oneapi::mkl::rng::device::uniform_bits<uint64_t>>)
+                                                 oneapi::math::rng::device::uniform_bits<uint64_t>>)
                         multiplier *= 2;
                     Engine engine(SEED, id * multiplier);
                     Distribution distr;
-                    auto res = oneapi::mkl::rng::device::generate(distr, engine);
+                    auto res = oneapi::math::rng::device::generate(distr, engine);
                     if constexpr (Engine::vec_size == 1) {
                         acc[id] = res;
                     }
@@ -93,7 +94,7 @@ class moments_test {
             });
             event.wait_and_throw();
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
@@ -107,7 +108,7 @@ class moments_test {
 
         // validation (statistics check is turned out for mcg59)
         if constexpr (!std::is_same<Engine,
-                                    oneapi::mkl::rng::device::mcg59<Engine::vec_size>>::value) {
+                                    oneapi::math::rng::device::mcg59<Engine::vec_size>>::value) {
             statistics_device<Distribution> stat;
             status = stat.check(r, Distribution{});
         }
diff --git a/tests/unit_tests/rng/device/include/rng_device_test_common.hpp b/tests/unit_tests/rng/device/include/rng_device_test_common.hpp
index 74c6ba503..5e373e2cf 100644
--- a/tests/unit_tests/rng/device/include/rng_device_test_common.hpp
+++ b/tests/unit_tests/rng/device/include/rng_device_test_common.hpp
@@ -170,10 +170,10 @@ template <typename Distribution>
 struct statistics_device {};
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::uniform<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::uniform<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::uniform<Fp, Method>& distr) {
+               const oneapi::math::rng::device::uniform<Fp, Method>& distr) {
         double tM, tD, tQ;
         Fp a = distr.a();
         Fp b = distr.b();
@@ -188,10 +188,10 @@ struct statistics_device<oneapi::mkl::rng::device::uniform<Fp, Method>> {
 };
 
 template <typename Method>
-struct statistics_device<oneapi::mkl::rng::device::uniform<std::int32_t, Method>> {
+struct statistics_device<oneapi::math::rng::device::uniform<std::int32_t, Method>> {
     template <typename AllocType>
     bool check(const std::vector<std::int32_t, AllocType>& r,
-               const oneapi::mkl::rng::device::uniform<std::int32_t, Method>& distr) {
+               const oneapi::math::rng::device::uniform<std::int32_t, Method>& distr) {
         double tM, tD, tQ;
         double a = distr.a();
         double b = distr.b();
@@ -207,10 +207,10 @@ struct statistics_device<oneapi::mkl::rng::device::uniform<std::int32_t, Method>
 };
 
 template <typename Method>
-struct statistics_device<oneapi::mkl::rng::device::uniform<std::uint32_t, Method>> {
+struct statistics_device<oneapi::math::rng::device::uniform<std::uint32_t, Method>> {
     template <typename AllocType>
     bool check(const std::vector<std::uint32_t, AllocType>& r,
-               const oneapi::mkl::rng::device::uniform<std::uint32_t, Method>& distr) {
+               const oneapi::math::rng::device::uniform<std::uint32_t, Method>& distr) {
         double tM, tD, tQ;
         double a = distr.a();
         double b = distr.b();
@@ -226,10 +226,10 @@ struct statistics_device<oneapi::mkl::rng::device::uniform<std::uint32_t, Method
 };
 
 template <typename Method>
-struct statistics_device<oneapi::mkl::rng::device::uniform<std::int64_t, Method>> {
+struct statistics_device<oneapi::math::rng::device::uniform<std::int64_t, Method>> {
     template <typename AllocType>
     bool check(const std::vector<std::int64_t, AllocType>& r,
-               const oneapi::mkl::rng::device::uniform<std::int64_t, Method>& distr) {
+               const oneapi::math::rng::device::uniform<std::int64_t, Method>& distr) {
         double tM, tD, tQ;
         double a = distr.a();
         double b = distr.b();
@@ -245,10 +245,10 @@ struct statistics_device<oneapi::mkl::rng::device::uniform<std::int64_t, Method>
 };
 
 template <typename Method>
-struct statistics_device<oneapi::mkl::rng::device::uniform<std::uint64_t, Method>> {
+struct statistics_device<oneapi::math::rng::device::uniform<std::uint64_t, Method>> {
     template <typename AllocType>
     bool check(const std::vector<std::uint64_t, AllocType>& r,
-               const oneapi::mkl::rng::device::uniform<std::uint64_t, Method>& distr) {
+               const oneapi::math::rng::device::uniform<std::uint64_t, Method>& distr) {
         double tM, tD, tQ;
         double a = distr.a();
         double b = distr.b();
@@ -264,10 +264,10 @@ struct statistics_device<oneapi::mkl::rng::device::uniform<std::uint64_t, Method
 };
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::gaussian<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::gaussian<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::gaussian<Fp, Method>& distr) {
+               const oneapi::math::rng::device::gaussian<Fp, Method>& distr) {
         double tM, tD, tQ;
         Fp a = distr.mean();
         Fp sigma = distr.stddev();
@@ -282,10 +282,10 @@ struct statistics_device<oneapi::mkl::rng::device::gaussian<Fp, Method>> {
 };
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::lognormal<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::lognormal<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::lognormal<Fp, Method>& distr) {
+               const oneapi::math::rng::device::lognormal<Fp, Method>& distr) {
         double tM, tD, tQ;
         Fp a = distr.m();
         Fp b = distr.displ();
@@ -304,10 +304,10 @@ struct statistics_device<oneapi::mkl::rng::device::lognormal<Fp, Method>> {
 };
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::exponential<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::exponential<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::exponential<Fp, Method>& distr) {
+               const oneapi::math::rng::device::exponential<Fp, Method>& distr) {
         double tM, tD, tQ;
         Fp a = distr.a();
         Fp beta = distr.beta();
@@ -321,10 +321,10 @@ struct statistics_device<oneapi::mkl::rng::device::exponential<Fp, Method>> {
 };
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::poisson<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::poisson<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::poisson<Fp, Method>& distr) {
+               const oneapi::math::rng::device::poisson<Fp, Method>& distr) {
         double tM, tD, tQ;
         double lambda = distr.lambda();
 
@@ -337,10 +337,10 @@ struct statistics_device<oneapi::mkl::rng::device::poisson<Fp, Method>> {
 };
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::bernoulli<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::bernoulli<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::bernoulli<Fp, Method>& distr) {
+               const oneapi::math::rng::device::bernoulli<Fp, Method>& distr) {
         double tM, tD, tQ;
         double p = static_cast<double>(distr.p());
 
@@ -353,10 +353,10 @@ struct statistics_device<oneapi::mkl::rng::device::bernoulli<Fp, Method>> {
 };
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::beta<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::beta<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::beta<Fp, Method>& distr) {
+               const oneapi::math::rng::device::beta<Fp, Method>& distr) {
         double tM, tD, tQ;
         double b, c, d, e, e2, b2, sum_pq;
         Fp p = distr.p();
@@ -381,10 +381,10 @@ struct statistics_device<oneapi::mkl::rng::device::beta<Fp, Method>> {
 };
 
 template <typename Fp, typename Method>
-struct statistics_device<oneapi::mkl::rng::device::gamma<Fp, Method>> {
+struct statistics_device<oneapi::math::rng::device::gamma<Fp, Method>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::gamma<Fp, Method>& distr) {
+               const oneapi::math::rng::device::gamma<Fp, Method>& distr) {
         double tM, tD, tQ;
         Fp a = distr.a();
         Fp alpha = distr.alpha();
@@ -399,19 +399,19 @@ struct statistics_device<oneapi::mkl::rng::device::gamma<Fp, Method>> {
 };
 
 template <typename Fp>
-struct statistics_device<oneapi::mkl::rng::device::bits<Fp>> {
+struct statistics_device<oneapi::math::rng::device::bits<Fp>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::bits<Fp>& distr) {
+               const oneapi::math::rng::device::bits<Fp>& distr) {
         return true;
     }
 };
 
 template <typename Fp>
-struct statistics_device<oneapi::mkl::rng::device::uniform_bits<Fp>> {
+struct statistics_device<oneapi::math::rng::device::uniform_bits<Fp>> {
     template <typename AllocType>
     bool check(const std::vector<Fp, AllocType>& r,
-               const oneapi::mkl::rng::device::uniform_bits<Fp>& distr) {
+               const oneapi::math::rng::device::uniform_bits<Fp>& distr) {
         return true;
     }
 };
@@ -420,6 +420,6 @@ template <typename Engine>
 struct is_mcg59 : std::false_type {};
 
 template <std::int32_t VecSize>
-struct is_mcg59<oneapi::mkl::rng::device::mcg59<VecSize>> : std::true_type {};
+struct is_mcg59<oneapi::math::rng::device::mcg59<VecSize>> : std::true_type {};
 
 #endif // _RNG_DEVICE_TEST_COMMON_HPP__
diff --git a/tests/unit_tests/rng/device/include/skip_ahead_test.hpp b/tests/unit_tests/rng/device/include/skip_ahead_test.hpp
index 0b3bcf8a7..775e57031 100644
--- a/tests/unit_tests/rng/device/include/skip_ahead_test.hpp
+++ b/tests/unit_tests/rng/device/include/skip_ahead_test.hpp
@@ -20,7 +20,7 @@
 /*
 *
 *  Content:
-*       oneapi::mkl::rng::device:: engines skip_ahead and skip_ahead_ex tests
+*       oneapi::math::rng::device:: engines skip_ahead and skip_ahead_ex tests
 *       (SYCL interface)
 *
 *******************************************************************************/
@@ -38,7 +38,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl/rng/device.hpp"
+#include "oneapi/math/rng/device.hpp"
 
 #include "rng_device_test_common.hpp"
 
@@ -61,9 +61,9 @@ class skip_ahead_test {
                 cgh.parallel_for(range, [=](sycl::item<1> item) {
                     size_t id = item.get_id(0);
                     Engine engine(SEED);
-                    oneapi::mkl::rng::device::skip_ahead(engine, id * Engine::vec_size);
-                    oneapi::mkl::rng::device::bits<UIntType> distr;
-                    auto res = oneapi::mkl::rng::device::generate(distr, engine);
+                    oneapi::math::rng::device::skip_ahead(engine, id * Engine::vec_size);
+                    oneapi::math::rng::device::bits<UIntType> distr;
+                    auto res = oneapi::math::rng::device::generate(distr, engine);
                     if constexpr (Engine::vec_size == 1) {
                         acc[id] = res;
                     }
@@ -74,7 +74,7 @@ class skip_ahead_test {
             });
             event.wait_and_throw();
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
@@ -88,9 +88,9 @@ class skip_ahead_test {
 
         // validation
         Engine engine(SEED);
-        oneapi::mkl::rng::device::bits<UIntType> distr;
+        oneapi::math::rng::device::bits<UIntType> distr;
         for (int i = 0; i < N_GEN; i += Engine::vec_size) {
-            auto res = oneapi::mkl::rng::device::generate(distr, engine);
+            auto res = oneapi::math::rng::device::generate(distr, engine);
             if constexpr (Engine::vec_size == 1) {
                 r_ref[i] = res;
             }
@@ -125,10 +125,10 @@ class skip_ahead_ex_test {
                 cgh.parallel_for(range, [=](sycl::item<1> item) {
                     size_t id = item.get_id(0);
                     Engine engine(SEED);
-                    oneapi::mkl::rng::device::skip_ahead(engine,
-                                                         { id * Engine::vec_size, skip_num });
-                    oneapi::mkl::rng::device::bits<> distr;
-                    auto res = oneapi::mkl::rng::device::generate(distr, engine);
+                    oneapi::math::rng::device::skip_ahead(engine,
+                                                          { id * Engine::vec_size, skip_num });
+                    oneapi::math::rng::device::bits<> distr;
+                    auto res = oneapi::math::rng::device::generate(distr, engine);
                     if constexpr (Engine::vec_size == 1) {
                         acc[id] = res;
                     }
@@ -139,7 +139,7 @@ class skip_ahead_ex_test {
             });
             event.wait_and_throw();
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
@@ -154,11 +154,11 @@ class skip_ahead_ex_test {
         // validation
         Engine engine(SEED);
         for (int j = 0; j < SKIP_TIMES; j++) {
-            oneapi::mkl::rng::device::skip_ahead(engine, N_SKIP);
+            oneapi::math::rng::device::skip_ahead(engine, N_SKIP);
         }
-        oneapi::mkl::rng::device::bits<> distr;
+        oneapi::math::rng::device::bits<> distr;
         for (int i = 0; i < N_GEN; i += Engine::vec_size) {
-            auto res = oneapi::mkl::rng::device::generate(distr, engine);
+            auto res = oneapi::math::rng::device::generate(distr, engine);
             if constexpr (Engine::vec_size == 1) {
                 r_ref[i] = res;
             }
diff --git a/tests/unit_tests/rng/device/moments/CMakeLists.txt b/tests/unit_tests/rng/device/moments/CMakeLists.txt
index 2da8033bf..e486955da 100644
--- a/tests/unit_tests/rng/device/moments/CMakeLists.txt
+++ b/tests/unit_tests/rng/device/moments/CMakeLists.txt
@@ -32,9 +32,9 @@ target_include_directories(rng_device_moments_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET rng_device_moments_ct SOURCES ${MOMENTS_DEVICE_TESTS_SOURCES})
 else()
-  target_link_libraries(rng_device_moments_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(rng_device_moments_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
-if(NOT ${ONEMKL_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
+if(NOT ${ONEMATH_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
   target_link_options(rng_device_moments_ct PUBLIC -fsycl -fsycl-device-code-split=per_kernel)
 endif()
diff --git a/tests/unit_tests/rng/device/moments/moments.cpp b/tests/unit_tests/rng/device/moments/moments.cpp
index 3ae45d657..a191b67df 100644
--- a/tests/unit_tests/rng/device/moments/moments.cpp
+++ b/tests/unit_tests/rng/device/moments/moments.cpp
@@ -30,19 +30,19 @@ class Philox4x32x10UniformStdDeviceMomentsTests : public ::testing::TestWithPara
 class Philox4x32x10UniformAccDeviceMomentsTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Philox4x32x10UniformStdDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -50,61 +50,61 @@ TEST_P(Philox4x32x10UniformStdDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Philox4x32x10UniformStdDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10UniformStdDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10UniformStdDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -113,21 +113,21 @@ TEST_P(Philox4x32x10UniformStdDeviceMomentsTests, Integer64Precision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int64_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int64_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int64_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int64_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int64_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int64_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -136,39 +136,39 @@ TEST_P(Philox4x32x10UniformStdDeviceMomentsTests, UnsignedInteger64Precision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint64_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint64_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint64_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint64_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint64_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint64_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10UniformAccDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -176,61 +176,61 @@ TEST_P(Philox4x32x10UniformAccDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Philox4x32x10UniformAccDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10UniformAccDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10UniformAccDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -239,21 +239,21 @@ TEST_P(Philox4x32x10UniformAccDeviceMomentsTests, Integer64Precision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int64_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int64_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int64_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int64_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int64_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int64_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -262,21 +262,21 @@ TEST_P(Philox4x32x10UniformAccDeviceMomentsTests, UnsignedInteger64Precision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint64_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint64_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint64_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint64_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint64_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint64_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -294,19 +294,19 @@ class Mrg32k3aUniformStdDeviceMomentsTests : public ::testing::TestWithParam<syc
 class Mrg32k3aUniformAccDeviceMomentsTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mrg32k3aUniformStdDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -314,79 +314,79 @@ TEST_P(Mrg32k3aUniformStdDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Mrg32k3aUniformStdDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mrg32k3aUniformStdDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mrg32k3aUniformStdDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mrg32k3aUniformAccDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -394,61 +394,61 @@ TEST_P(Mrg32k3aUniformAccDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Mrg32k3aUniformAccDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mrg32k3aUniformAccDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mrg32k3aUniformAccDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mrg32k3a<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mrg32k3a<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -466,19 +466,19 @@ class Mcg31m1UniformStdDeviceMomentsTests : public ::testing::TestWithParam<sycl
 class Mcg31m1UniformAccDeviceMomentsTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mcg31m1UniformStdDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -486,79 +486,79 @@ TEST_P(Mcg31m1UniformStdDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Mcg31m1UniformStdDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg31m1UniformStdDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg31m1UniformStdDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg31m1UniformAccDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -566,61 +566,61 @@ TEST_P(Mcg31m1UniformAccDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Mcg31m1UniformAccDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg31m1UniformAccDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg31m1UniformAccDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg31m1<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg31m1<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -638,19 +638,19 @@ class Mcg59UniformStdDeviceMomentsTests : public ::testing::TestWithParam<sycl::
 class Mcg59UniformAccDeviceMomentsTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mcg59UniformStdDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -658,79 +658,79 @@ TEST_P(Mcg59UniformStdDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Mcg59UniformStdDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::standard>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg59UniformStdDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg59<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg59<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg59<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg59UniformStdDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg59<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg59<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::standard>>>
+        moments_test<oneapi::math::rng::device::mcg59<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::standard>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg59UniformAccDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     float, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     float, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -738,61 +738,61 @@ TEST_P(Mcg59UniformAccDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Mcg59UniformAccDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<1>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<4>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                                 oneapi::mkl::rng::device::uniform<
-                                     double, oneapi::mkl::rng::device::uniform_method::accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::mcg59<16>,
+                                 oneapi::math::rng::device::uniform<
+                                     double, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg59UniformAccDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg59<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg59<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::int32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg59<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::int32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mcg59UniformAccDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<1>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg59<1>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<4>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg59<4>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::mcg59<16>,
-                     oneapi::mkl::rng::device::uniform<
-                         std::uint32_t, oneapi::mkl::rng::device::uniform_method::accurate>>>
+        moments_test<oneapi::math::rng::device::mcg59<16>,
+                     oneapi::math::rng::device::uniform<
+                         std::uint32_t, oneapi::math::rng::device::uniform_method::accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -806,16 +806,16 @@ INSTANTIATE_TEST_SUITE_P(Mcg59UniformAccDeviceMomentsTestsSuite, Mcg59UniformAcc
 class Philox4x32x10BitsDeviceMomentsTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Philox4x32x10BitsDeviceMomentsTests, UnsignedIntegerPrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::bits<uint32_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::bits<uint32_t>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::bits<uint32_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::bits<uint32_t>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::bits<uint32_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::bits<uint32_t>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -828,31 +828,31 @@ class Philox4x32x10UniformBitsDeviceMomentsTests : public ::testing::TestWithPar
 };
 
 TEST_P(Philox4x32x10UniformBitsDeviceMomentsTests, UnsignedIntegerPrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::uniform_bits<uint32_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::uniform_bits<uint32_t>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::uniform_bits<uint32_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::uniform_bits<uint32_t>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::uniform_bits<uint32_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::uniform_bits<uint32_t>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10UniformBitsDeviceMomentsTests, UnsignedLongIntegerPrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::uniform_bits<uint64_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::uniform_bits<uint64_t>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::uniform_bits<uint64_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::uniform_bits<uint64_t>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::uniform_bits<uint64_t>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::uniform_bits<uint64_t>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -869,39 +869,39 @@ TEST_P(Philox4x32x10GaussianBoxMuller2DeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::gaussian<
-                         float, oneapi::mkl::rng::device::gaussian_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::gaussian<
+                         float, oneapi::math::rng::device::gaussian_method::box_muller2>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::gaussian<
-                         float, oneapi::mkl::rng::device::gaussian_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::gaussian<
+                         float, oneapi::math::rng::device::gaussian_method::box_muller2>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::gaussian<
-                         float, oneapi::mkl::rng::device::gaussian_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::gaussian<
+                         float, oneapi::math::rng::device::gaussian_method::box_muller2>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::gaussian<
-                         double, oneapi::mkl::rng::device::gaussian_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::gaussian<
+                         double, oneapi::math::rng::device::gaussian_method::box_muller2>>>
         test4;
     EXPECT_TRUEORSKIP((test4(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::gaussian<
-                         double, oneapi::mkl::rng::device::gaussian_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::gaussian<
+                         double, oneapi::math::rng::device::gaussian_method::box_muller2>>>
         test5;
     EXPECT_TRUEORSKIP((test5(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::gaussian<
-                         double, oneapi::mkl::rng::device::gaussian_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::gaussian<
+                         double, oneapi::math::rng::device::gaussian_method::box_muller2>>>
         test6;
     EXPECT_TRUEORSKIP((test6(GetParam())));
 }
@@ -915,21 +915,21 @@ class Philox4x32x10LognormalBoxMuller2DeviceMomentsTests
 
 TEST_P(Philox4x32x10LognormalBoxMuller2DeviceMomentsTests, RealSinglePrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::lognormal<
-                         float, oneapi::mkl::rng::device::lognormal_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::lognormal<
+                         float, oneapi::math::rng::device::lognormal_method::box_muller2>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::lognormal<
-                         float, oneapi::mkl::rng::device::lognormal_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::lognormal<
+                         float, oneapi::math::rng::device::lognormal_method::box_muller2>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::lognormal<
-                         float, oneapi::mkl::rng::device::lognormal_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::lognormal<
+                         float, oneapi::math::rng::device::lognormal_method::box_muller2>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -938,21 +938,21 @@ TEST_P(Philox4x32x10LognormalBoxMuller2DeviceMomentsTests, RealDoublePrecision)
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::lognormal<
-                         double, oneapi::mkl::rng::device::lognormal_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::lognormal<
+                         double, oneapi::math::rng::device::lognormal_method::box_muller2>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::lognormal<
-                         double, oneapi::mkl::rng::device::lognormal_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::lognormal<
+                         double, oneapi::math::rng::device::lognormal_method::box_muller2>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::lognormal<
-                         double, oneapi::mkl::rng::device::lognormal_method::box_muller2>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::lognormal<
+                         double, oneapi::math::rng::device::lognormal_method::box_muller2>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -971,34 +971,34 @@ class Philox4x32x10ExponentialIcdfAccDeviceMomentsTests
 TEST_P(Philox4x32x10ExponentialIcdfDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::exponential<
-                                     float, oneapi::mkl::rng::device::exponential_method::icdf>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::exponential<
+                                     float, oneapi::math::rng::device::exponential_method::icdf>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::exponential<
-                                     float, oneapi::mkl::rng::device::exponential_method::icdf>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::exponential<
+                                     float, oneapi::math::rng::device::exponential_method::icdf>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::exponential<
-                                     float, oneapi::mkl::rng::device::exponential_method::icdf>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::exponential<
+                                     float, oneapi::math::rng::device::exponential_method::icdf>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::exponential<
-                                     double, oneapi::mkl::rng::device::exponential_method::icdf>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::exponential<
+                                     double, oneapi::math::rng::device::exponential_method::icdf>>>
         test4;
     EXPECT_TRUEORSKIP((test4(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::exponential<
-                                     double, oneapi::mkl::rng::device::exponential_method::icdf>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::exponential<
+                                     double, oneapi::math::rng::device::exponential_method::icdf>>>
         test5;
     EXPECT_TRUEORSKIP((test5(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::exponential<
-                                     double, oneapi::mkl::rng::device::exponential_method::icdf>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::exponential<
+                                     double, oneapi::math::rng::device::exponential_method::icdf>>>
         test6;
     EXPECT_TRUEORSKIP((test6(GetParam())));
 }
@@ -1008,39 +1008,39 @@ TEST_P(Philox4x32x10ExponentialIcdfAccDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::exponential<
-                         float, oneapi::mkl::rng::device::exponential_method::icdf_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::exponential<
+                         float, oneapi::math::rng::device::exponential_method::icdf_accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::exponential<
-                         float, oneapi::mkl::rng::device::exponential_method::icdf_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::exponential<
+                         float, oneapi::math::rng::device::exponential_method::icdf_accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::exponential<
-                         float, oneapi::mkl::rng::device::exponential_method::icdf_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::exponential<
+                         float, oneapi::math::rng::device::exponential_method::icdf_accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::exponential<
-                         double, oneapi::mkl::rng::device::exponential_method::icdf_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::exponential<
+                         double, oneapi::math::rng::device::exponential_method::icdf_accurate>>>
         test4;
     EXPECT_TRUEORSKIP((test4(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::exponential<
-                         double, oneapi::mkl::rng::device::exponential_method::icdf_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::exponential<
+                         double, oneapi::math::rng::device::exponential_method::icdf_accurate>>>
         test5;
     EXPECT_TRUEORSKIP((test5(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::exponential<
-                         double, oneapi::mkl::rng::device::exponential_method::icdf_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::exponential<
+                         double, oneapi::math::rng::device::exponential_method::icdf_accurate>>>
         test6;
     EXPECT_TRUEORSKIP((test6(GetParam())));
 }
@@ -1059,18 +1059,18 @@ class Philox4x32x10BetaCjaAccDeviceMomentsTests : public ::testing::TestWithPara
 
 TEST_P(Philox4x32x10BetaCjaDeviceMomentsTests, RealSinglePrecision) {
     rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<1>,
-        oneapi::mkl::rng::device::beta<float, oneapi::mkl::rng::device::beta_method::cja>>>
+        oneapi::math::rng::device::philox4x32x10<1>,
+        oneapi::math::rng::device::beta<float, oneapi::math::rng::device::beta_method::cja>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<4>,
-        oneapi::mkl::rng::device::beta<float, oneapi::mkl::rng::device::beta_method::cja>>>
+        oneapi::math::rng::device::philox4x32x10<4>,
+        oneapi::math::rng::device::beta<float, oneapi::math::rng::device::beta_method::cja>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<16>,
-        oneapi::mkl::rng::device::beta<float, oneapi::mkl::rng::device::beta_method::cja>>>
+        oneapi::math::rng::device::philox4x32x10<16>,
+        oneapi::math::rng::device::beta<float, oneapi::math::rng::device::beta_method::cja>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -1079,36 +1079,36 @@ TEST_P(Philox4x32x10BetaCjaDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<1>,
-        oneapi::mkl::rng::device::beta<double, oneapi::mkl::rng::device::beta_method::cja>>>
+        oneapi::math::rng::device::philox4x32x10<1>,
+        oneapi::math::rng::device::beta<double, oneapi::math::rng::device::beta_method::cja>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<4>,
-        oneapi::mkl::rng::device::beta<double, oneapi::mkl::rng::device::beta_method::cja>>>
+        oneapi::math::rng::device::philox4x32x10<4>,
+        oneapi::math::rng::device::beta<double, oneapi::math::rng::device::beta_method::cja>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<16>,
-        oneapi::mkl::rng::device::beta<double, oneapi::mkl::rng::device::beta_method::cja>>>
+        oneapi::math::rng::device::philox4x32x10<16>,
+        oneapi::math::rng::device::beta<double, oneapi::math::rng::device::beta_method::cja>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10BetaCjaAccDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<1>,
-        oneapi::mkl::rng::device::beta<float, oneapi::mkl::rng::device::beta_method::cja_accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::beta<
+                                     float, oneapi::math::rng::device::beta_method::cja_accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<4>,
-        oneapi::mkl::rng::device::beta<float, oneapi::mkl::rng::device::beta_method::cja_accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::beta<
+                                     float, oneapi::math::rng::device::beta_method::cja_accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<16>,
-        oneapi::mkl::rng::device::beta<float, oneapi::mkl::rng::device::beta_method::cja_accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::beta<
+                                     float, oneapi::math::rng::device::beta_method::cja_accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -1116,19 +1116,19 @@ TEST_P(Philox4x32x10BetaCjaAccDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Philox4x32x10BetaCjaAccDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::beta<
-                                     double, oneapi::mkl::rng::device::beta_method::cja_accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::beta<
+                                     double, oneapi::math::rng::device::beta_method::cja_accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::beta<
-                                     double, oneapi::mkl::rng::device::beta_method::cja_accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::beta<
+                                     double, oneapi::math::rng::device::beta_method::cja_accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::beta<
-                                     double, oneapi::mkl::rng::device::beta_method::cja_accurate>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::beta<
+                                     double, oneapi::math::rng::device::beta_method::cja_accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -1148,19 +1148,19 @@ class Philox4x32x10GammaMarsagliaAccDeviceMomentsTests
         : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Philox4x32x10GammaMarsagliaDeviceMomentsTests, RealSinglePrecision) {
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<1>,
-        oneapi::mkl::rng::device::gamma<float, oneapi::mkl::rng::device::gamma_method::marsaglia>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::gamma<
+                                     float, oneapi::math::rng::device::gamma_method::marsaglia>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<4>,
-        oneapi::mkl::rng::device::gamma<float, oneapi::mkl::rng::device::gamma_method::marsaglia>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::gamma<
+                                     float, oneapi::math::rng::device::gamma_method::marsaglia>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<16>,
-        oneapi::mkl::rng::device::gamma<float, oneapi::mkl::rng::device::gamma_method::marsaglia>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::gamma<
+                                     float, oneapi::math::rng::device::gamma_method::marsaglia>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -1168,40 +1168,40 @@ TEST_P(Philox4x32x10GammaMarsagliaDeviceMomentsTests, RealSinglePrecision) {
 TEST_P(Philox4x32x10GammaMarsagliaDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<1>,
-        oneapi::mkl::rng::device::gamma<double, oneapi::mkl::rng::device::gamma_method::marsaglia>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::gamma<
+                                     double, oneapi::math::rng::device::gamma_method::marsaglia>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<4>,
-        oneapi::mkl::rng::device::gamma<double, oneapi::mkl::rng::device::gamma_method::marsaglia>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::gamma<
+                                     double, oneapi::math::rng::device::gamma_method::marsaglia>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<
-        oneapi::mkl::rng::device::philox4x32x10<16>,
-        oneapi::mkl::rng::device::gamma<double, oneapi::mkl::rng::device::gamma_method::marsaglia>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::gamma<
+                                     double, oneapi::math::rng::device::gamma_method::marsaglia>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10GammaMarsagliaAccDeviceMomentsTests, RealSinglePrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::gamma<
-                         float, oneapi::mkl::rng::device::gamma_method::marsaglia_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::gamma<
+                         float, oneapi::math::rng::device::gamma_method::marsaglia_accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::gamma<
-                         float, oneapi::mkl::rng::device::gamma_method::marsaglia_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::gamma<
+                         float, oneapi::math::rng::device::gamma_method::marsaglia_accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::gamma<
-                         float, oneapi::mkl::rng::device::gamma_method::marsaglia_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::gamma<
+                         float, oneapi::math::rng::device::gamma_method::marsaglia_accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -1210,21 +1210,21 @@ TEST_P(Philox4x32x10GammaMarsagliaAccDeviceMomentsTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::gamma<
-                         double, oneapi::mkl::rng::device::gamma_method::marsaglia_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::gamma<
+                         double, oneapi::math::rng::device::gamma_method::marsaglia_accurate>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::gamma<
-                         double, oneapi::mkl::rng::device::gamma_method::marsaglia_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::gamma<
+                         double, oneapi::math::rng::device::gamma_method::marsaglia_accurate>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::gamma<
-                         double, oneapi::mkl::rng::device::gamma_method::marsaglia_accurate>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::gamma<
+                         double, oneapi::math::rng::device::gamma_method::marsaglia_accurate>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -1241,40 +1241,40 @@ class Philox4x32x10PoissonDevroyeDeviceMomentsTests
         : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Philox4x32x10PoissonDevroyeDeviceMomentsTests, IntegerPrecision) {
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                                 oneapi::mkl::rng::device::poisson<
-                                     int32_t, oneapi::mkl::rng::device::poisson_method::devroye>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                                 oneapi::math::rng::device::poisson<
+                                     int32_t, oneapi::math::rng::device::poisson_method::devroye>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                                 oneapi::mkl::rng::device::poisson<
-                                     int32_t, oneapi::mkl::rng::device::poisson_method::devroye>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                                 oneapi::math::rng::device::poisson<
+                                     int32_t, oneapi::math::rng::device::poisson_method::devroye>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                                 oneapi::mkl::rng::device::poisson<
-                                     int32_t, oneapi::mkl::rng::device::poisson_method::devroye>>>
+    rng_device_test<moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                                 oneapi::math::rng::device::poisson<
+                                     int32_t, oneapi::math::rng::device::poisson_method::devroye>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10PoissonDevroyeDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::poisson<
-                         std::uint32_t, oneapi::mkl::rng::device::poisson_method::devroye>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::poisson<
+                         std::uint32_t, oneapi::math::rng::device::poisson_method::devroye>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::poisson<
-                         std::uint32_t, oneapi::mkl::rng::device::poisson_method::devroye>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::poisson<
+                         std::uint32_t, oneapi::math::rng::device::poisson_method::devroye>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::poisson<
-                         std::uint32_t, oneapi::mkl::rng::device::poisson_method::devroye>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::poisson<
+                         std::uint32_t, oneapi::math::rng::device::poisson_method::devroye>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
@@ -1288,126 +1288,126 @@ class Philox4x32x10BernoulliIcdfDeviceMomentsTests
 
 TEST_P(Philox4x32x10BernoulliIcdfDeviceMomentsTests, IntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int32_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int32_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int32_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int32_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int32_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int32_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10BernoulliIcdfDeviceMomentsTests, UnsignedIntegerPrecision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint32_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint32_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint32_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint32_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint32_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint32_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10BernoulliIcdfDeviceMomentsTests, Integer8Precision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int8_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int8_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int8_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int8_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int8_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int8_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10BernoulliIcdfDeviceMomentsTests, UnsignedInteger8Precision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint8_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint8_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint8_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint8_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint8_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint8_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10BernoulliIcdfDeviceMomentsTests, Integer16Precision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int16_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int16_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int16_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int16_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::int16_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::int16_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10BernoulliIcdfDeviceMomentsTests, UnsignedInteger16Precision) {
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<1>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint16_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<1>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint16_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<4>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint16_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<4>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint16_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
     rng_device_test<
-        moments_test<oneapi::mkl::rng::device::philox4x32x10<16>,
-                     oneapi::mkl::rng::device::bernoulli<
-                         std::uint16_t, oneapi::mkl::rng::device::bernoulli_method::icdf>>>
+        moments_test<oneapi::math::rng::device::philox4x32x10<16>,
+                     oneapi::math::rng::device::bernoulli<
+                         std::uint16_t, oneapi::math::rng::device::bernoulli_method::icdf>>>
         test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
diff --git a/tests/unit_tests/rng/device/service/CMakeLists.txt b/tests/unit_tests/rng/device/service/CMakeLists.txt
index 03d960e1a..f009a5c7e 100644
--- a/tests/unit_tests/rng/device/service/CMakeLists.txt
+++ b/tests/unit_tests/rng/device/service/CMakeLists.txt
@@ -32,9 +32,9 @@ target_include_directories(rng_device_service_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET rng_device_service_ct SOURCES ${SERVICE_DEVICE_TESTS_SOURCES})
 else()
-  target_link_libraries(rng_device_service_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(rng_device_service_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
 
-if(NOT ${ONEMKL_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
+if(NOT ${ONEMATH_SYCL_IMPLEMENTATION} STREQUAL "hipsycl")
   target_link_options(rng_device_service_ct PUBLIC -fsycl -fsycl-device-code-split=per_kernel)
 endif()
diff --git a/tests/unit_tests/rng/device/service/skip_ahead.cpp b/tests/unit_tests/rng/device/service/skip_ahead.cpp
index a5dfe0da8..662c56cff 100644
--- a/tests/unit_tests/rng/device/service/skip_ahead.cpp
+++ b/tests/unit_tests/rng/device/service/skip_ahead.cpp
@@ -30,20 +30,20 @@ class Philox4x32x10DeviceSkipAheadTests : public ::testing::TestWithParam<sycl::
 class Philox4x32x10DeviceSkipAheadExTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Philox4x32x10DeviceSkipAheadTests, BinaryPrecision) {
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::philox4x32x10<1>>> test1;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::philox4x32x10<1>>> test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::philox4x32x10<4>>> test2;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::philox4x32x10<4>>> test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::philox4x32x10<16>>> test3;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::philox4x32x10<16>>> test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Philox4x32x10DeviceSkipAheadExTests, BinaryPrecision) {
-    rng_device_test<skip_ahead_ex_test<oneapi::mkl::rng::device::philox4x32x10<1>>> test1;
+    rng_device_test<skip_ahead_ex_test<oneapi::math::rng::device::philox4x32x10<1>>> test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<skip_ahead_ex_test<oneapi::mkl::rng::device::philox4x32x10<4>>> test2;
+    rng_device_test<skip_ahead_ex_test<oneapi::math::rng::device::philox4x32x10<4>>> test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<skip_ahead_ex_test<oneapi::mkl::rng::device::philox4x32x10<16>>> test3;
+    rng_device_test<skip_ahead_ex_test<oneapi::math::rng::device::philox4x32x10<16>>> test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
@@ -59,20 +59,20 @@ class Mrg32k3aDeviceSkipAheadTests : public ::testing::TestWithParam<sycl::devic
 class Mrg32k3aDeviceSkipAheadExTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mrg32k3aDeviceSkipAheadTests, BinaryPrecision) {
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mrg32k3a<1>>> test1;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mrg32k3a<1>>> test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mrg32k3a<4>>> test2;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mrg32k3a<4>>> test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mrg32k3a<16>>> test3;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mrg32k3a<16>>> test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
 TEST_P(Mrg32k3aDeviceSkipAheadExTests, BinaryPrecision) {
-    rng_device_test<skip_ahead_ex_test<oneapi::mkl::rng::device::mrg32k3a<1>>> test1;
+    rng_device_test<skip_ahead_ex_test<oneapi::math::rng::device::mrg32k3a<1>>> test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<skip_ahead_ex_test<oneapi::mkl::rng::device::mrg32k3a<4>>> test2;
+    rng_device_test<skip_ahead_ex_test<oneapi::math::rng::device::mrg32k3a<4>>> test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<skip_ahead_ex_test<oneapi::mkl::rng::device::mrg32k3a<16>>> test3;
+    rng_device_test<skip_ahead_ex_test<oneapi::math::rng::device::mrg32k3a<16>>> test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
@@ -85,11 +85,11 @@ INSTANTIATE_TEST_SUITE_P(Mrg32k3aDeviceSkipAheadExTestsSuite, Mrg32k3aDeviceSkip
 class Mcg31m1DeviceSkipAheadTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mcg31m1DeviceSkipAheadTests, BinaryPrecision) {
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mcg31m1<1>>> test1;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mcg31m1<1>>> test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mcg31m1<4>>> test2;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mcg31m1<4>>> test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mcg31m1<16>>> test3;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mcg31m1<16>>> test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
@@ -99,11 +99,11 @@ INSTANTIATE_TEST_SUITE_P(Mcg31m1DeviceSkipAheadTestsSuite, Mcg31m1DeviceSkipAhea
 class Mcg59DeviceSkipAheadTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mcg59DeviceSkipAheadTests, BinaryPrecision) {
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mcg59<1>>> test1;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mcg59<1>>> test1;
     EXPECT_TRUEORSKIP((test1(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mcg59<4>>> test2;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mcg59<4>>> test2;
     EXPECT_TRUEORSKIP((test2(GetParam())));
-    rng_device_test<skip_ahead_test<oneapi::mkl::rng::device::mcg59<16>>> test3;
+    rng_device_test<skip_ahead_test<oneapi::math::rng::device::mcg59<16>>> test3;
     EXPECT_TRUEORSKIP((test3(GetParam())));
 }
 
diff --git a/tests/unit_tests/rng/include/engines_api_tests.hpp b/tests/unit_tests/rng/include/engines_api_tests.hpp
index 2469c3023..0acdd4b98 100644
--- a/tests/unit_tests/rng/include/engines_api_tests.hpp
+++ b/tests/unit_tests/rng/include/engines_api_tests.hpp
@@ -30,7 +30,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "rng_test_common.hpp"
 
@@ -52,20 +52,20 @@ class engines_constructors_test {
             Engine engine3(engine1);
             Engine engine4 = std::move(Engine(queue, SEED));
 
-            oneapi::mkl::rng::bits<std::uint32_t> distr;
+            oneapi::math::rng::bits<std::uint32_t> distr;
 
             sycl::buffer<std::uint32_t, 1> r1_buffer(r1.data(), r1.size());
             sycl::buffer<std::uint32_t, 1> r2_buffer(r2.data(), r2.size());
             sycl::buffer<std::uint32_t, 1> r3_buffer(r3.data(), r3.size());
             sycl::buffer<std::uint32_t, 1> r4_buffer(r4.data(), r4.size());
 
-            oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer);
-            oneapi::mkl::rng::generate(distr, engine2, N_GEN, r2_buffer);
-            oneapi::mkl::rng::generate(distr, engine3, N_GEN, r3_buffer);
-            oneapi::mkl::rng::generate(distr, engine4, N_GEN, r4_buffer);
+            oneapi::math::rng::generate(distr, engine1, N_GEN, r1_buffer);
+            oneapi::math::rng::generate(distr, engine2, N_GEN, r2_buffer);
+            oneapi::math::rng::generate(distr, engine3, N_GEN, r3_buffer);
+            oneapi::math::rng::generate(distr, engine4, N_GEN, r4_buffer);
             QUEUE_WAIT(queue);
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
@@ -99,13 +99,13 @@ class engines_copy_test {
             Engine engine1(queue, SEED);
             Engine engine2(engine1);
 
-            oneapi::mkl::rng::bits<std::uint32_t> distr;
+            oneapi::math::rng::bits<std::uint32_t> distr;
             {
                 sycl::buffer<std::uint32_t, 1> r1_buffer(r1.data(), r1.size());
                 sycl::buffer<std::uint32_t, 1> r2_buffer(r2.data(), r2.size());
 
-                oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer);
-                oneapi::mkl::rng::generate(distr, engine2, N_GEN, r2_buffer);
+                oneapi::math::rng::generate(distr, engine1, N_GEN, r1_buffer);
+                oneapi::math::rng::generate(distr, engine2, N_GEN, r2_buffer);
             }
 
             Engine engine3 = engine1;
@@ -115,13 +115,13 @@ class engines_copy_test {
                 sycl::buffer<std::uint32_t, 1> r2_buffer(r2.data(), r2.size());
                 sycl::buffer<std::uint32_t, 1> r3_buffer(r3.data(), r3.size());
 
-                oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer);
-                oneapi::mkl::rng::generate(distr, engine3, N_GEN, r2_buffer);
-                oneapi::mkl::rng::generate(distr, engine4, N_GEN, r3_buffer);
+                oneapi::math::rng::generate(distr, engine1, N_GEN, r1_buffer);
+                oneapi::math::rng::generate(distr, engine3, N_GEN, r2_buffer);
+                oneapi::math::rng::generate(distr, engine4, N_GEN, r3_buffer);
             }
             QUEUE_WAIT(queue);
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
diff --git a/tests/unit_tests/rng/include/rng_test_common.hpp b/tests/unit_tests/rng/include/rng_test_common.hpp
index 2acfd784c..3d3601553 100644
--- a/tests/unit_tests/rng/include/rng_test_common.hpp
+++ b/tests/unit_tests/rng/include/rng_test_common.hpp
@@ -91,7 +91,7 @@ class rng_test {
             }
         };
 
-#ifdef ONEMKL_ENABLE_CURAND_BACKEND // w/a for cuda backend hangs when there are several queues with different contexts
+#ifdef ONEMATH_ENABLE_CURAND_BACKEND // w/a for cuda backend hangs when there are several queues with different contexts
         static sycl::device* previous_device = nullptr;
         static sycl::context* context = nullptr;
 
diff --git a/tests/unit_tests/rng/include/skip_ahead_test.hpp b/tests/unit_tests/rng/include/skip_ahead_test.hpp
index efec71dde..ecb65b92a 100644
--- a/tests/unit_tests/rng/include/skip_ahead_test.hpp
+++ b/tests/unit_tests/rng/include/skip_ahead_test.hpp
@@ -30,7 +30,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "rng_test_common.hpp"
 
@@ -48,12 +48,12 @@ class skip_ahead_test {
             Engine engine(queue);
             std::vector<Engine*> engines;
 
-            oneapi::mkl::rng::bits<std::uint32_t> distr;
+            oneapi::math::rng::bits<std::uint32_t> distr;
 
             // Perform skip
             for (int i = 0; i < N_ENGINES; i++) {
                 engines.push_back(new Engine(queue));
-                oneapi::mkl::rng::skip_ahead(*(engines[i]), i * N_PORTION);
+                oneapi::math::rng::skip_ahead(*(engines[i]), i * N_PORTION);
             }
 
             sycl::buffer<std::uint32_t, 1> r_buffer(r1.data(), r1.size());
@@ -63,9 +63,9 @@ class skip_ahead_test {
                     sycl::buffer<std::uint32_t, 1>(r2.data() + i * N_PORTION, N_PORTION));
             }
 
-            oneapi::mkl::rng::generate(distr, engine, N_GEN_SERVICE, r_buffer);
+            oneapi::math::rng::generate(distr, engine, N_GEN_SERVICE, r_buffer);
             for (int i = 0; i < N_ENGINES; i++) {
-                oneapi::mkl::rng::generate(distr, *(engines[i]), N_PORTION, r_buffers[i]);
+                oneapi::math::rng::generate(distr, *(engines[i]), N_PORTION, r_buffers[i]);
             }
             QUEUE_WAIT(queue);
 
@@ -74,7 +74,7 @@ class skip_ahead_test {
                 delete engines[i];
             }
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
@@ -106,22 +106,22 @@ class skip_ahead_ex_test {
             Engine engine1(queue);
             Engine engine2(queue);
 
-            oneapi::mkl::rng::bits<std::uint32_t> distr;
+            oneapi::math::rng::bits<std::uint32_t> distr;
 
             // Perform skip
             for (int j = 0; j < SKIP_TIMES; j++) {
-                oneapi::mkl::rng::skip_ahead(engine1, N_SKIP);
+                oneapi::math::rng::skip_ahead(engine1, N_SKIP);
             }
-            oneapi::mkl::rng::skip_ahead(engine2, NUM_TO_SKIP);
+            oneapi::math::rng::skip_ahead(engine2, NUM_TO_SKIP);
 
             sycl::buffer<std::uint32_t, 1> r1_buffer(r1.data(), r1.size());
             sycl::buffer<std::uint32_t, 1> r2_buffer(r2.data(), r2.size());
 
-            oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer);
-            oneapi::mkl::rng::generate(distr, engine2, N_GEN, r2_buffer);
+            oneapi::math::rng::generate(distr, engine1, N_GEN, r1_buffer);
+            oneapi::math::rng::generate(distr, engine2, N_GEN, r2_buffer);
             QUEUE_WAIT(queue);
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
diff --git a/tests/unit_tests/rng/include/statistics_check.hpp b/tests/unit_tests/rng/include/statistics_check.hpp
index 8a1d045f0..cb753460a 100644
--- a/tests/unit_tests/rng/include/statistics_check.hpp
+++ b/tests/unit_tests/rng/include/statistics_check.hpp
@@ -26,7 +26,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "rng_test_common.hpp"
 
@@ -68,10 +68,10 @@ template <typename Distribution>
 struct statistics {};
 
 template <typename Type, typename Method>
-struct statistics<oneapi::mkl::rng::uniform<Type, Method>> {
+struct statistics<oneapi::math::rng::uniform<Type, Method>> {
     template <typename AllocType>
     bool check(std::vector<Type, AllocType>& r,
-               const oneapi::mkl::rng::uniform<Type, Method>& distr) {
+               const oneapi::math::rng::uniform<Type, Method>& distr) {
         double tM, tD, tQ;
         Type a = distr.a();
         Type b = distr.b();
@@ -86,10 +86,10 @@ struct statistics<oneapi::mkl::rng::uniform<Type, Method>> {
 };
 
 template <typename Method>
-struct statistics<oneapi::mkl::rng::uniform<std::int32_t, Method>> {
+struct statistics<oneapi::math::rng::uniform<std::int32_t, Method>> {
     template <typename AllocType>
     bool check(std::vector<int32_t, AllocType>& r,
-               const oneapi::mkl::rng::uniform<int32_t, Method>& distr) {
+               const oneapi::math::rng::uniform<int32_t, Method>& distr) {
         double tM, tD, tQ;
         int32_t a = distr.a();
         int32_t b = distr.b();
@@ -105,10 +105,10 @@ struct statistics<oneapi::mkl::rng::uniform<std::int32_t, Method>> {
 };
 
 template <typename Type, typename Method>
-struct statistics<oneapi::mkl::rng::gaussian<Type, Method>> {
+struct statistics<oneapi::math::rng::gaussian<Type, Method>> {
     template <typename AllocType>
     bool check(std::vector<Type, AllocType>& r,
-               const oneapi::mkl::rng::gaussian<Type, Method>& distr) {
+               const oneapi::math::rng::gaussian<Type, Method>& distr) {
         double tM, tD, tQ;
         Type a = distr.mean();
         Type sigma = distr.stddev();
@@ -123,10 +123,10 @@ struct statistics<oneapi::mkl::rng::gaussian<Type, Method>> {
 };
 
 template <typename Type, typename Method>
-struct statistics<oneapi::mkl::rng::lognormal<Type, Method>> {
+struct statistics<oneapi::math::rng::lognormal<Type, Method>> {
     template <typename AllocType>
     bool check(std::vector<Type, AllocType>& r,
-               const oneapi::mkl::rng::lognormal<Type, Method>& distr) {
+               const oneapi::math::rng::lognormal<Type, Method>& distr) {
         double tM, tD, tQ;
         Type a = distr.m();
         Type b = distr.displ();
@@ -145,10 +145,10 @@ struct statistics<oneapi::mkl::rng::lognormal<Type, Method>> {
 };
 
 template <typename Type, typename Method>
-struct statistics<oneapi::mkl::rng::bernoulli<Type, Method>> {
+struct statistics<oneapi::math::rng::bernoulli<Type, Method>> {
     template <typename AllocType>
     bool check(std::vector<Type, AllocType>& r,
-               const oneapi::mkl::rng::bernoulli<Type, Method>& distr) {
+               const oneapi::math::rng::bernoulli<Type, Method>& distr) {
         double tM, tD, tQ;
         double p = distr.p();
 
@@ -161,10 +161,10 @@ struct statistics<oneapi::mkl::rng::bernoulli<Type, Method>> {
 };
 
 template <typename Type, typename Method>
-struct statistics<oneapi::mkl::rng::poisson<Type, Method>> {
+struct statistics<oneapi::math::rng::poisson<Type, Method>> {
     template <typename AllocType>
     bool check(std::vector<Type, AllocType>& r,
-               const oneapi::mkl::rng::poisson<Type, Method>& distr) {
+               const oneapi::math::rng::poisson<Type, Method>& distr) {
         double tM, tD, tQ;
         double lambda = distr.lambda();
 
diff --git a/tests/unit_tests/rng/include/statistics_check_test.hpp b/tests/unit_tests/rng/include/statistics_check_test.hpp
index 14a637d7a..87493e09a 100644
--- a/tests/unit_tests/rng/include/statistics_check_test.hpp
+++ b/tests/unit_tests/rng/include/statistics_check_test.hpp
@@ -30,7 +30,7 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "statistics_check.hpp"
 
@@ -62,7 +62,7 @@ class statistics_test {
 
             Engine engine(queue, SEED);
             Distr distr(args...);
-            oneapi::mkl::rng::generate(distr, engine, n_gen, r_buffer);
+            oneapi::math::rng::generate(distr, engine, n_gen, r_buffer);
             QUEUE_WAIT(queue);
         }
         catch (sycl::exception const& e) {
@@ -70,7 +70,7 @@ class statistics_test {
                       << e.what() << std::endl;
             print_error_code(e);
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
@@ -101,7 +101,7 @@ class statistics_usm_test {
         try {
             Engine engine(queue, SEED);
             Distr distr(args...);
-            auto event = oneapi::mkl::rng::generate(distr, engine, n_gen, r.data());
+            auto event = oneapi::math::rng::generate(distr, engine, n_gen, r.data());
             event.wait_and_throw();
         }
         catch (sycl::exception const& e) {
@@ -109,7 +109,7 @@ class statistics_usm_test {
                       << e.what() << std::endl;
             print_error_code(e);
         }
-        catch (const oneapi::mkl::unimplemented& e) {
+        catch (const oneapi::math::unimplemented& e) {
             status = test_skipped;
             return;
         }
diff --git a/tests/unit_tests/rng/service/CMakeLists.txt b/tests/unit_tests/rng/service/CMakeLists.txt
index 8436ce9eb..f00f7ffaa 100644
--- a/tests/unit_tests/rng/service/CMakeLists.txt
+++ b/tests/unit_tests/rng/service/CMakeLists.txt
@@ -33,7 +33,7 @@ if(BUILD_SHARED_LIBS)
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET rng_service_rt SOURCES ${SERVICE_TESTS_SOURCES})
   else()
-    target_link_libraries(rng_service_rt PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(rng_service_rt PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 endif()
 
@@ -49,5 +49,5 @@ target_include_directories(rng_service_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET rng_service_ct SOURCES ${SERVICE_TESTS_SOURCES})
 else()
-  target_link_libraries(rng_service_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(rng_service_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
diff --git a/tests/unit_tests/rng/service/engines_api_test.cpp b/tests/unit_tests/rng/service/engines_api_test.cpp
index 500231703..9c17da89a 100644
--- a/tests/unit_tests/rng/service/engines_api_test.cpp
+++ b/tests/unit_tests/rng/service/engines_api_test.cpp
@@ -30,13 +30,13 @@ class Philox4x32x10ConstructorsTests : public ::testing::TestWithParam<sycl::dev
 class Philox4x32x10CopyTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Philox4x32x10ConstructorsTests, BinaryPrecision) {
-    rng_test<engines_constructors_test<oneapi::mkl::rng::philox4x32x10>> test;
+    rng_test<engines_constructors_test<oneapi::math::rng::philox4x32x10>> test;
     std::initializer_list<std::uint64_t> seed_ex = { SEED, 0, 0 };
     EXPECT_TRUEORSKIP((test(GetParam(), seed_ex)));
 }
 
 TEST_P(Philox4x32x10CopyTests, BinaryPrecision) {
-    rng_test<engines_copy_test<oneapi::mkl::rng::philox4x32x10>> test;
+    rng_test<engines_copy_test<oneapi::math::rng::philox4x32x10>> test;
     EXPECT_TRUEORSKIP((test(GetParam())));
 }
 
@@ -51,13 +51,13 @@ class Mrg32k3aConstructorsTests : public ::testing::TestWithParam<sycl::device*>
 class Mrg32k3aCopyTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mrg32k3aConstructorsTests, BinaryPrecision) {
-    rng_test<engines_constructors_test<oneapi::mkl::rng::mrg32k3a>> test;
+    rng_test<engines_constructors_test<oneapi::math::rng::mrg32k3a>> test;
     std::initializer_list<std::uint32_t> seed_ex = { SEED, 1, 1, 1, 1, 1 };
     EXPECT_TRUEORSKIP((test(GetParam(), seed_ex)));
 }
 
 TEST_P(Mrg32k3aCopyTests, BinaryPrecision) {
-    rng_test<engines_copy_test<oneapi::mkl::rng::mrg32k3a>> test;
+    rng_test<engines_copy_test<oneapi::math::rng::mrg32k3a>> test;
     EXPECT_TRUEORSKIP((test(GetParam())));
 }
 
diff --git a/tests/unit_tests/rng/service/skip_ahead.cpp b/tests/unit_tests/rng/service/skip_ahead.cpp
index 445b76abe..abd3d7ffd 100644
--- a/tests/unit_tests/rng/service/skip_ahead.cpp
+++ b/tests/unit_tests/rng/service/skip_ahead.cpp
@@ -30,12 +30,12 @@ class Philox4x32x10SkipAheadTests : public ::testing::TestWithParam<sycl::device
 class Philox4x32x10SkipAheadExTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Philox4x32x10SkipAheadTests, BinaryPrecision) {
-    rng_test<skip_ahead_test<oneapi::mkl::rng::philox4x32x10>> test;
+    rng_test<skip_ahead_test<oneapi::math::rng::philox4x32x10>> test;
     EXPECT_TRUEORSKIP((test(GetParam())));
 }
 
 TEST_P(Philox4x32x10SkipAheadExTests, BinaryPrecision) {
-    rng_test<skip_ahead_ex_test<oneapi::mkl::rng::philox4x32x10>> test;
+    rng_test<skip_ahead_ex_test<oneapi::math::rng::philox4x32x10>> test;
     EXPECT_TRUEORSKIP((test(GetParam())));
 }
 
@@ -50,12 +50,12 @@ class Mrg32k3aSkipAheadTests : public ::testing::TestWithParam<sycl::device*> {}
 class Mrg32k3aSkipAheadExTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(Mrg32k3aSkipAheadTests, BinaryPrecision) {
-    rng_test<skip_ahead_test<oneapi::mkl::rng::mrg32k3a>> test;
+    rng_test<skip_ahead_test<oneapi::math::rng::mrg32k3a>> test;
     EXPECT_TRUEORSKIP((test(GetParam())));
 }
 
 TEST_P(Mrg32k3aSkipAheadExTests, BinaryPrecision) {
-    rng_test<skip_ahead_ex_test<oneapi::mkl::rng::mrg32k3a>> test;
+    rng_test<skip_ahead_ex_test<oneapi::math::rng::mrg32k3a>> test;
     EXPECT_TRUEORSKIP((test(GetParam())));
 }
 
diff --git a/tests/unit_tests/rng/statistics_check/CMakeLists.txt b/tests/unit_tests/rng/statistics_check/CMakeLists.txt
index 244d33976..90ef555a5 100644
--- a/tests/unit_tests/rng/statistics_check/CMakeLists.txt
+++ b/tests/unit_tests/rng/statistics_check/CMakeLists.txt
@@ -33,7 +33,7 @@ if(BUILD_SHARED_LIBS)
   if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET rng_statistics_rt SOURCES ${STATS_CHECK_SOURCES})
   else()
-    target_link_libraries(rng_statistics_rt PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(rng_statistics_rt PUBLIC ONEMATH::SYCL::SYCL)
   endif()
 endif()
 
@@ -49,5 +49,5 @@ target_include_directories(rng_statistics_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
   add_sycl_to_target(TARGET rng_statistics_ct SOURCES ${STATS_CHECK_SOURCES})
 else()
-  target_link_libraries(rng_statistics_ct PUBLIC ONEMKL::SYCL::SYCL)
+  target_link_libraries(rng_statistics_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif()
diff --git a/tests/unit_tests/rng/statistics_check/bernoulli.cpp b/tests/unit_tests/rng/statistics_check/bernoulli.cpp
index b95d98118..87166dbf3 100755
--- a/tests/unit_tests/rng/statistics_check/bernoulli.cpp
+++ b/tests/unit_tests/rng/statistics_check/bernoulli.cpp
@@ -29,26 +29,26 @@ class BernoulliIcdfTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(BernoulliIcdfTests, IntegerPrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::bernoulli<std::int32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::bernoulli<std::int32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, BERNOULLI_ARGS)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::bernoulli<std::int32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::bernoulli<std::int32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS)));
 }
 
 TEST_P(BernoulliIcdfTests, UnsignedIntegerPrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::bernoulli<std::uint32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::bernoulli<std::uint32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, BERNOULLI_ARGS)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::bernoulli<std::uint32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::bernoulli<std::uint32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp b/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp
index 9c8c934dd..ad0664500 100755
--- a/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp
+++ b/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp
@@ -29,26 +29,26 @@ class BernoulliIcdfUsmTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(BernoulliIcdfUsmTests, IntegerPrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::bernoulli<std::int32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::bernoulli<std::int32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, BERNOULLI_ARGS)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::bernoulli<std::int32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::bernoulli<std::int32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS)));
 }
 
 TEST_P(BernoulliIcdfUsmTests, UnsignedIntegerPrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::bernoulli<std::uint32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::bernoulli<std::uint32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, BERNOULLI_ARGS)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::bernoulli<std::int32_t, oneapi::mkl::rng::bernoulli_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::bernoulli<std::int32_t, oneapi::math::rng::bernoulli_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/gaussian.cpp b/tests/unit_tests/rng/statistics_check/gaussian.cpp
index ed63f3221..8f33db53d 100644
--- a/tests/unit_tests/rng/statistics_check/gaussian.cpp
+++ b/tests/unit_tests/rng/statistics_check/gaussian.cpp
@@ -30,14 +30,14 @@ class GaussianBoxmullerTest : public ::testing::TestWithParam<sycl::device*> {};
 class GaussianIcdfTest : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(GaussianIcdfTest, RealSinglePrecision) {
-    rng_test<
-        statistics_test<oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>,
-                        oneapi::mkl::rng::philox4x32x10>>
+    rng_test<statistics_test<
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
-    rng_test<
-        statistics_test<oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>,
-                        oneapi::mkl::rng::mrg32k3a>>
+    rng_test<statistics_test<
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
 }
@@ -45,27 +45,27 @@ TEST_P(GaussianIcdfTest, RealSinglePrecision) {
 TEST_P(GaussianIcdfTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
-    rng_test<
-        statistics_test<oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>,
-                        oneapi::mkl::rng::philox4x32x10>>
+    rng_test<statistics_test<
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
-    rng_test<
-        statistics_test<oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>,
-                        oneapi::mkl::rng::mrg32k3a>>
+    rng_test<statistics_test<
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
 }
 
 TEST_P(GaussianBoxmullerTest, RealSinglePrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
 }
@@ -74,13 +74,13 @@ TEST_P(GaussianBoxmullerTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_test<
-        oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp b/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp
index a1d4d1b06..d387bbc6d 100644
--- a/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp
+++ b/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp
@@ -31,13 +31,13 @@ class GaussianIcdfUsmTest : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(GaussianIcdfUsmTest, RealSinglePrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
 }
@@ -46,26 +46,26 @@ TEST_P(GaussianIcdfUsmTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
 }
 
 TEST_P(GaussianBoxmullerUsmTest, RealSinglePrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<float, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::gaussian<float, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT)));
 }
@@ -74,13 +74,13 @@ TEST_P(GaussianBoxmullerUsmTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::gaussian<double, oneapi::mkl::rng::gaussian_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::gaussian<double, oneapi::math::rng::gaussian_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/lognormal.cpp b/tests/unit_tests/rng/statistics_check/lognormal.cpp
index 5486202bb..9ba225e20 100755
--- a/tests/unit_tests/rng/statistics_check/lognormal.cpp
+++ b/tests/unit_tests/rng/statistics_check/lognormal.cpp
@@ -31,13 +31,13 @@ class LognormalIcdfTest : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(LognormalIcdfTest, RealSinglePrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
 }
@@ -46,26 +46,26 @@ TEST_P(LognormalIcdfTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
 }
 
 TEST_P(LognormalBoxmullerTest, RealSinglePrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
 }
@@ -74,13 +74,13 @@ TEST_P(LognormalBoxmullerTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp b/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp
index d59d9458a..ad9dd48d7 100755
--- a/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp
+++ b/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp
@@ -31,13 +31,13 @@ class LognormalIcdfUsmTest : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(LognormalIcdfUsmTest, RealSinglePrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
 }
@@ -46,26 +46,26 @@ TEST_P(LognormalIcdfUsmTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::icdf>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::icdf>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
 }
 
 TEST_P(LognormalBoxmullerUsmTest, RealSinglePrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<float, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<float, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT)));
 }
@@ -74,13 +74,13 @@ TEST_P(LognormalBoxmullerUsmTest, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::lognormal<double, oneapi::mkl::rng::lognormal_method::box_muller2>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::lognormal<double, oneapi::math::rng::lognormal_method::box_muller2>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/poisson.cpp b/tests/unit_tests/rng/statistics_check/poisson.cpp
index d39842e9f..238a3cb6e 100755
--- a/tests/unit_tests/rng/statistics_check/poisson.cpp
+++ b/tests/unit_tests/rng/statistics_check/poisson.cpp
@@ -29,30 +29,30 @@ class PoissonIcdfTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(PoissonIcdfTests, IntegerPrecision) {
     rng_test<
-        statistics_test<oneapi::mkl::rng::poisson<
-                            std::int32_t, oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-                        oneapi::mkl::rng::philox4x32x10>>
+        statistics_test<oneapi::math::rng::poisson<
+                            std::int32_t, oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+                        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, POISSON_ARGS)));
     rng_test<
-        statistics_test<oneapi::mkl::rng::poisson<
-                            std::int32_t, oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-                        oneapi::mkl::rng::mrg32k3a>>
+        statistics_test<oneapi::math::rng::poisson<
+                            std::int32_t, oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+                        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS)));
 }
 
 TEST_P(PoissonIcdfTests, UnsignedIntegerPrecision) {
     rng_test<
-        statistics_test<oneapi::mkl::rng::poisson<
-                            std::uint32_t, oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-                        oneapi::mkl::rng::philox4x32x10>>
+        statistics_test<oneapi::math::rng::poisson<
+                            std::uint32_t, oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+                        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, POISSON_ARGS)));
     rng_test<
-        statistics_test<oneapi::mkl::rng::poisson<
-                            std::int32_t, oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-                        oneapi::mkl::rng::mrg32k3a>>
+        statistics_test<oneapi::math::rng::poisson<
+                            std::int32_t, oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+                        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/poisson_usm.cpp b/tests/unit_tests/rng/statistics_check/poisson_usm.cpp
old mode 100755
new mode 100644
index 052eff5a3..6927669b0
--- a/tests/unit_tests/rng/statistics_check/poisson_usm.cpp
+++ b/tests/unit_tests/rng/statistics_check/poisson_usm.cpp
@@ -29,30 +29,30 @@ class PoissonIcdfUsmTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(PoissonIcdfUsmTests, IntegerPrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::poisson<std::int32_t,
-                                  oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::poisson<std::int32_t,
+                                   oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, POISSON_ARGS)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::poisson<std::int32_t,
-                                  oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::poisson<std::int32_t,
+                                   oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS)));
 }
 
 TEST_P(PoissonIcdfUsmTests, UnsignedIntegerPrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::poisson<std::uint32_t,
-                                  oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::poisson<std::uint32_t,
+                                   oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, POISSON_ARGS)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::poisson<std::int32_t,
-                                  oneapi::mkl::rng::poisson_method::gaussian_icdf_based>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::poisson<std::int32_t,
+                                   oneapi::math::rng::poisson_method::gaussian_icdf_based>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/uniform.cpp b/tests/unit_tests/rng/statistics_check/uniform.cpp
index eb11714e1..d21644708 100644
--- a/tests/unit_tests/rng/statistics_check/uniform.cpp
+++ b/tests/unit_tests/rng/statistics_check/uniform.cpp
@@ -31,13 +31,13 @@ class UniformAccurateTests : public ::testing::TestWithParam<sycl::device*> {};
 
 TEST_P(UniformStdTests, RealSinglePrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
 }
@@ -46,39 +46,39 @@ TEST_P(UniformStdTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
 }
 
 TEST_P(UniformStdTests, IntegerPrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_INT)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_INT)));
 }
 
 TEST_P(UniformAccurateTests, RealSinglePrecision) {
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
 }
@@ -87,13 +87,13 @@ TEST_P(UniformAccurateTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
     rng_test<statistics_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
 }
diff --git a/tests/unit_tests/rng/statistics_check/uniform_usm.cpp b/tests/unit_tests/rng/statistics_check/uniform_usm.cpp
index df4f7a764..d406d0a59 100644
--- a/tests/unit_tests/rng/statistics_check/uniform_usm.cpp
+++ b/tests/unit_tests/rng/statistics_check/uniform_usm.cpp
@@ -31,13 +31,13 @@ class UniformAccurateUsmTests : public ::testing::TestWithParam<sycl::device*> {
 
 TEST_P(UniformStdUsmTests, RealSinglePrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
 }
@@ -46,39 +46,39 @@ TEST_P(UniformStdUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
 }
 
 TEST_P(UniformStdUsmTests, IntegerPrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_INT)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<std::int32_t, oneapi::mkl::rng::uniform_method::standard>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<std::int32_t, oneapi::math::rng::uniform_method::standard>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_INT)));
 }
 
 TEST_P(UniformAccurateUsmTests, RealSinglePrecision) {
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<float, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<float, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT)));
 }
@@ -87,13 +87,13 @@ TEST_P(UniformAccurateUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
 
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::philox4x32x10>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::philox4x32x10>>
         test1;
     EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
     rng_test<statistics_usm_test<
-        oneapi::mkl::rng::uniform<double, oneapi::mkl::rng::uniform_method::accurate>,
-        oneapi::mkl::rng::mrg32k3a>>
+        oneapi::math::rng::uniform<double, oneapi::math::rng::uniform_method::accurate>,
+        oneapi::math::rng::mrg32k3a>>
         test2;
     EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE)));
 }
diff --git a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp
index 675d8930a..584de713d 100644
--- a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp
+++ b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp
@@ -24,7 +24,7 @@
 #include <string>
 #include <tuple>
 
-#include "oneapi/mkl.hpp"
+#include "oneapi/math.hpp"
 
 #include "test_common.hpp"
 
@@ -55,10 +55,10 @@ inline T opVal(const T t, const bool isConj) {
 };
 
 template <typename fpType, typename intType, typename accIntType, typename accFpType>
-void do_csr_transpose(const oneapi::mkl::transpose opA, intType* ia_t, intType* ja_t, fpType* a_t,
+void do_csr_transpose(const oneapi::math::transpose opA, intType* ia_t, intType* ja_t, fpType* a_t,
                       intType a_nrows, intType a_ncols, intType indexing, accIntType& ia,
                       accIntType& ja, accFpType& a, const bool structOnlyFlag = false) {
-    const bool isConj = (opA == oneapi::mkl::transpose::conjtrans);
+    const bool isConj = (opA == oneapi::math::transpose::conjtrans);
 
     // initialize ia_t to zero
     for (intType i = 0; i < a_ncols + 1; ++i) {
@@ -107,17 +107,17 @@ void do_csr_transpose(const oneapi::mkl::transpose opA, intType* ia_t, intType*
 template <typename fpType, typename intType>
 auto sparse_transpose_if_needed(const intType* ia, const intType* ja, const fpType* a,
                                 intType a_nrows, intType a_ncols, std::size_t nnz, intType indexing,
-                                oneapi::mkl::transpose transpose_val) {
+                                oneapi::math::transpose transpose_val) {
     std::vector<intType> iopa;
     std::vector<intType> jopa;
     std::vector<fpType> opa;
-    if (transpose_val == oneapi::mkl::transpose::nontrans) {
+    if (transpose_val == oneapi::math::transpose::nontrans) {
         iopa.assign(ia, ia + a_nrows + 1);
         jopa.assign(ja, ja + nnz);
         opa.assign(a, a + nnz);
     }
-    else if (transpose_val == oneapi::mkl::transpose::trans ||
-             transpose_val == oneapi::mkl::transpose::conjtrans) {
+    else if (transpose_val == oneapi::math::transpose::trans ||
+             transpose_val == oneapi::math::transpose::conjtrans) {
         iopa.resize(static_cast<std::size_t>(a_ncols + 1));
         jopa.resize(nnz);
         opa.resize(nnz);
@@ -135,11 +135,11 @@ auto sparse_transpose_if_needed(const intType* ia, const intType* ja, const fpTy
 /// The outputted matrix always uses row major layout
 template <typename fpType>
 auto extract_dense_matrix(const fpType* x, std::size_t nrows, std::size_t ncols, std::size_t ld,
-                          oneapi::mkl::transpose transpose_val,
-                          oneapi::mkl::layout dense_matrix_layout) {
-    const bool is_row_major = dense_matrix_layout == oneapi::mkl::layout::row_major;
-    const bool is_transposed = transpose_val != oneapi::mkl::transpose::nontrans;
-    const bool apply_conjugate = transpose_val == oneapi::mkl::transpose::conjtrans;
+                          oneapi::math::transpose transpose_val,
+                          oneapi::math::layout dense_matrix_layout) {
+    const bool is_row_major = dense_matrix_layout == oneapi::math::layout::row_major;
+    const bool is_transposed = transpose_val != oneapi::math::transpose::nontrans;
+    const bool apply_conjugate = transpose_val == oneapi::math::transpose::conjtrans;
     const bool swap_ld = is_row_major != is_transposed;
     if (swap_ld && ncols > ld) {
         throw std::runtime_error("Expected ncols <= ld");
@@ -164,31 +164,31 @@ template <typename fpType, typename intType>
 std::vector<fpType> sparse_to_dense(sparse_matrix_format_t format, const intType* ia,
                                     const intType* ja, const fpType* a, std::size_t a_nrows,
                                     std::size_t a_ncols, std::size_t nnz, intType indexing,
-                                    oneapi::mkl::transpose transpose_val,
-                                    oneapi::mkl::sparse::matrix_view A_view) {
-    oneapi::mkl::sparse::matrix_descr type_view = A_view.type_view;
-    oneapi::mkl::uplo uplo_val = A_view.uplo_view;
+                                    oneapi::math::transpose transpose_val,
+                                    oneapi::math::sparse::matrix_view A_view) {
+    oneapi::math::sparse::matrix_descr type_view = A_view.type_view;
+    oneapi::math::uplo uplo_val = A_view.uplo_view;
     const bool is_symmetric_or_hermitian_view =
-        type_view == oneapi::mkl::sparse::matrix_descr::symmetric ||
-        type_view == oneapi::mkl::sparse::matrix_descr::hermitian;
-    const bool apply_conjugate = transpose_val == oneapi::mkl::transpose::conjtrans;
+        type_view == oneapi::math::sparse::matrix_descr::symmetric ||
+        type_view == oneapi::math::sparse::matrix_descr::hermitian;
+    const bool apply_conjugate = transpose_val == oneapi::math::transpose::conjtrans;
     std::vector<fpType> dense_a(a_nrows * a_ncols, fpType(0));
 
     auto write_to_dense_if_needed = [&](std::size_t a_idx, std::size_t row, std::size_t col) {
-        if ((type_view == oneapi::mkl::sparse::matrix_descr::triangular ||
+        if ((type_view == oneapi::math::sparse::matrix_descr::triangular ||
              is_symmetric_or_hermitian_view) &&
-            ((uplo_val == oneapi::mkl::uplo::lower && col > row) ||
-             (uplo_val == oneapi::mkl::uplo::upper && col < row))) {
+            ((uplo_val == oneapi::math::uplo::lower && col > row) ||
+             (uplo_val == oneapi::math::uplo::upper && col < row))) {
             // Read only the upper or lower part of the sparse matrix
             return;
         }
-        if (type_view == oneapi::mkl::sparse::matrix_descr::diagonal && col != row) {
+        if (type_view == oneapi::math::sparse::matrix_descr::diagonal && col != row) {
             // Read only the diagonal
             return;
         }
         // Do not transpose symmetric matrices to simplify the propagation of the symmetric values
         std::size_t dense_a_idx =
-            (!is_symmetric_or_hermitian_view && transpose_val != oneapi::mkl::transpose::nontrans)
+            (!is_symmetric_or_hermitian_view && transpose_val != oneapi::math::transpose::nontrans)
                 ? col * a_nrows + row
                 : row * a_ncols + col;
         fpType val = opVal(a[a_idx], apply_conjugate);
@@ -213,7 +213,7 @@ std::vector<fpType> sparse_to_dense(sparse_matrix_format_t format, const intType
     }
 
     // Write unit diagonal
-    if (A_view.diag_view == oneapi::mkl::diag::unit && a_nrows == a_ncols) {
+    if (A_view.diag_view == oneapi::math::diag::unit && a_nrows == a_ncols) {
         for (std::size_t i = 0; i < a_nrows; i++) {
             dense_a[i * a_nrows + i] = fpType(1);
         }
@@ -223,7 +223,7 @@ std::vector<fpType> sparse_to_dense(sparse_matrix_format_t format, const intType
     if (is_symmetric_or_hermitian_view) {
         for (std::size_t i = 0; i < a_nrows; ++i) {
             for (std::size_t j = i + 1; j < a_ncols; ++j) {
-                if (uplo_val == oneapi::mkl::uplo::lower) {
+                if (uplo_val == oneapi::math::uplo::lower) {
                     dense_a[i * a_ncols + j] = dense_a[j * a_nrows + i];
                 }
                 else {
diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp
index 6637e0daa..6a577a389 100644
--- a/tests/unit_tests/sparse_blas/include/test_common.hpp
+++ b/tests/unit_tests/sparse_blas/include/test_common.hpp
@@ -59,36 +59,37 @@ enum sparse_matrix_format_t {
     COO,
 };
 
-inline std::set<oneapi::mkl::sparse::matrix_property> get_default_matrix_properties(
+inline std::set<oneapi::math::sparse::matrix_property> get_default_matrix_properties(
     sycl::queue queue, sparse_matrix_format_t format) {
-    auto vendor_id = oneapi::mkl::get_device_id(queue);
-    if (vendor_id == oneapi::mkl::device::nvidiagpu && format == sparse_matrix_format_t::COO) {
-        return { oneapi::mkl::sparse::matrix_property::sorted_by_rows };
+    auto vendor_id = oneapi::math::get_device_id(queue);
+    if (vendor_id == oneapi::math::device::nvidiagpu && format == sparse_matrix_format_t::COO) {
+        return { oneapi::math::sparse::matrix_property::sorted_by_rows };
     }
     return {};
 }
 
 /// Return the combinations of matrix_properties to test other than the default
-inline std::vector<std::set<oneapi::mkl::sparse::matrix_property>>
+inline std::vector<std::set<oneapi::math::sparse::matrix_property>>
 get_all_matrix_properties_combinations(sycl::queue queue, sparse_matrix_format_t format) {
-    auto vendor_id = oneapi::mkl::get_device_id(queue);
-    if (vendor_id == oneapi::mkl::device::nvidiagpu && format == sparse_matrix_format_t::COO) {
+    auto vendor_id = oneapi::math::get_device_id(queue);
+    if (vendor_id == oneapi::math::device::nvidiagpu && format == sparse_matrix_format_t::COO) {
         // Ensure all the sets have the sorted or sorted_by_rows properties
-        return { { oneapi::mkl::sparse::matrix_property::sorted },
-                 { oneapi::mkl::sparse::matrix_property::sorted_by_rows,
-                   oneapi::mkl::sparse::matrix_property::symmetric },
-                 { oneapi::mkl::sparse::matrix_property::sorted,
-                   oneapi::mkl::sparse::matrix_property::symmetric } };
-    }
-
-    std::vector<std::set<oneapi::mkl::sparse::matrix_property>> properties_combinations{
-        { oneapi::mkl::sparse::matrix_property::sorted },
-        { oneapi::mkl::sparse::matrix_property::symmetric },
-        { oneapi::mkl::sparse::matrix_property::sorted,
-          oneapi::mkl::sparse::matrix_property::symmetric }
+        return { { oneapi::math::sparse::matrix_property::sorted },
+                 { oneapi::math::sparse::matrix_property::sorted_by_rows,
+                   oneapi::math::sparse::matrix_property::symmetric },
+                 { oneapi::math::sparse::matrix_property::sorted,
+                   oneapi::math::sparse::matrix_property::symmetric } };
+    }
+
+    std::vector<std::set<oneapi::math::sparse::matrix_property>> properties_combinations{
+        { oneapi::math::sparse::matrix_property::sorted },
+        { oneapi::math::sparse::matrix_property::symmetric },
+        { oneapi::math::sparse::matrix_property::sorted,
+          oneapi::math::sparse::matrix_property::symmetric }
     };
     if (format == sparse_matrix_format_t::COO) {
-        properties_combinations.push_back({ oneapi::mkl::sparse::matrix_property::sorted_by_rows });
+        properties_combinations.push_back(
+            { oneapi::math::sparse::matrix_property::sorted_by_rows });
     }
     return properties_combinations;
 }
@@ -156,12 +157,12 @@ auto swap_if_cond(bool swap, T x, T y) {
 }
 
 template <typename OutT, typename XT, typename YT>
-auto swap_if_transposed(oneapi::mkl::transpose op, XT x, YT y) {
-    return swap_if_cond<OutT, XT, YT>(op != oneapi::mkl::transpose::nontrans, x, y);
+auto swap_if_transposed(oneapi::math::transpose op, XT x, YT y) {
+    return swap_if_cond<OutT, XT, YT>(op != oneapi::math::transpose::nontrans, x, y);
 }
 
 template <typename T>
-auto swap_if_transposed(oneapi::mkl::transpose op, T x, T y) {
+auto swap_if_transposed(oneapi::math::transpose op, T x, T y) {
     return swap_if_transposed<T, T, T>(op, x, y);
 }
 
@@ -205,13 +206,13 @@ void rand_vector(std::vector<fpType>& v, std::size_t n) {
 }
 
 template <typename fpType>
-void rand_matrix(std::vector<fpType>& m, oneapi::mkl::layout layout_val, std::size_t nrows,
+void rand_matrix(std::vector<fpType>& m, oneapi::math::layout layout_val, std::size_t nrows,
                  std::size_t ncols, std::size_t ld,
-                 oneapi::mkl::transpose transpose_val = oneapi::mkl::transpose::nontrans) {
+                 oneapi::math::transpose transpose_val = oneapi::math::transpose::nontrans) {
     using fpRealType = typename complex_info<fpType>::real_type;
     auto [op_nrows, op_cols] = swap_if_transposed(transpose_val, nrows, ncols);
     auto [outer_size, inner_size] =
-        swap_if_cond(layout_val == oneapi::mkl::layout::row_major, op_cols, op_nrows);
+        swap_if_cond(layout_val == oneapi::math::layout::row_major, op_cols, op_nrows);
     if (inner_size > ld) {
         throw std::runtime_error("Expected inner_size <= ld");
     }
@@ -366,15 +367,15 @@ intType generate_random_matrix(sparse_matrix_format_t format, const intType nrow
 template <typename fpType, typename intType>
 void shuffle_sparse_matrix_if_needed(
     sparse_matrix_format_t format,
-    const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties, intType indexing,
+    const std::set<oneapi::math::sparse::matrix_property>& matrix_properties, intType indexing,
     intType* ia, intType* ja, fpType* a, intType nnz, std::size_t nrows) {
-    const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) !=
+    const bool is_sorted = matrix_properties.find(oneapi::math::sparse::matrix_property::sorted) !=
                            matrix_properties.cend();
     if (is_sorted) {
         return;
     }
     const bool is_sorted_by_rows =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted_by_rows) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::sorted_by_rows) !=
         matrix_properties.cend();
     if (format == sparse_matrix_format_t::CSR) {
         for (std::size_t i = 0; i < nrows; ++i) {
@@ -419,48 +420,48 @@ void shuffle_sparse_matrix_if_needed(
         }
     }
     else {
-        throw oneapi::mkl::exception("sparse_blas", "shuffle_sparse_matrix_if_needed",
-                                     "Internal error: unsupported format");
+        throw oneapi::math::exception("sparse_blas", "shuffle_sparse_matrix_if_needed",
+                                      "Internal error: unsupported format");
     }
 }
 
 /// Initialize a sparse matrix specified by the given format
 template <typename ContainerValueT, typename ContainerIndexT>
 void init_sparse_matrix(sycl::queue& queue, sparse_matrix_format_t format,
-                        oneapi::mkl::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows,
-                        std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                        oneapi::math::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows,
+                        std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                         ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) {
     if (format == sparse_matrix_format_t::CSR) {
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_csr_matrix, queue, p_smhandle, num_rows, num_cols,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_csr_matrix, queue, p_smhandle, num_rows, num_cols,
                       nnz, index, rows, cols, vals);
     }
     else if (format == sparse_matrix_format_t::COO) {
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_coo_matrix, queue, p_smhandle, num_rows, num_cols,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_coo_matrix, queue, p_smhandle, num_rows, num_cols,
                       nnz, index, rows, cols, vals);
     }
     else {
-        throw oneapi::mkl::exception("sparse_blas", "init_sparse_matrix",
-                                     "Internal error: unsupported format");
+        throw oneapi::math::exception("sparse_blas", "init_sparse_matrix",
+                                      "Internal error: unsupported format");
     }
 }
 
 /// Reset the data of a sparse matrix specified by the given format
 template <typename ContainerValueT, typename ContainerIndexT>
 void set_matrix_data(sycl::queue& queue, sparse_matrix_format_t format,
-                     oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows,
-                     std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index,
+                     oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows,
+                     std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index,
                      ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) {
     if (format == sparse_matrix_format_t::CSR) {
-        CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_matrix_data, queue, smhandle, num_rows, num_cols,
-                      nnz, index, rows, cols, vals);
+        CALL_RT_OR_CT(oneapi::math::sparse::set_csr_matrix_data, queue, smhandle, num_rows,
+                      num_cols, nnz, index, rows, cols, vals);
     }
     else if (format == sparse_matrix_format_t::COO) {
-        CALL_RT_OR_CT(oneapi::mkl::sparse::set_coo_matrix_data, queue, smhandle, num_rows, num_cols,
-                      nnz, index, rows, cols, vals);
+        CALL_RT_OR_CT(oneapi::math::sparse::set_coo_matrix_data, queue, smhandle, num_rows,
+                      num_cols, nnz, index, rows, cols, vals);
     }
     else {
-        throw oneapi::mkl::exception("sparse_blas", "set_matrix_data",
-                                     "Internal error: unsupported format");
+        throw oneapi::math::exception("sparse_blas", "set_matrix_data",
+                                      "Internal error: unsupported format");
     }
 }
 
@@ -475,18 +476,18 @@ inline void free_handles(sycl::queue& queue, const std::vector<sycl::event> depe
             }
             sycl::event event;
             if constexpr (std::is_same_v<decltype(handles),
-                                         oneapi::mkl::sparse::dense_vector_handle_t>) {
-                CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_dense_vector, queue, handles,
+                                         oneapi::math::sparse::dense_vector_handle_t>) {
+                CALL_RT_OR_CT(event = oneapi::math::sparse::release_dense_vector, queue, handles,
                               dependencies);
             }
             else if constexpr (std::is_same_v<decltype(handles),
-                                              oneapi::mkl::sparse::dense_matrix_handle_t>) {
-                CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_dense_matrix, queue, handles,
+                                              oneapi::math::sparse::dense_matrix_handle_t>) {
+                CALL_RT_OR_CT(event = oneapi::math::sparse::release_dense_matrix, queue, handles,
                               dependencies);
             }
             else if constexpr (std::is_same_v<decltype(handles),
-                                              oneapi::mkl::sparse::matrix_handle_t>) {
-                CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_sparse_matrix, queue, handles,
+                                              oneapi::math::sparse::matrix_handle_t>) {
+                CALL_RT_OR_CT(event = oneapi::math::sparse::release_sparse_matrix, queue, handles,
                               dependencies);
             }
             event.wait();
@@ -506,12 +507,12 @@ inline void wait_and_free_handles(sycl::queue& queue, HandlesT&&... handles) {
 }
 
 inline bool require_square_matrix(
-    oneapi::mkl::sparse::matrix_view A_view,
-    const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties) {
+    oneapi::math::sparse::matrix_view A_view,
+    const std::set<oneapi::math::sparse::matrix_property>& matrix_properties) {
     const bool is_symmetric =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) !=
         matrix_properties.cend();
-    return A_view.type_view != oneapi::mkl::sparse::matrix_descr::general || is_symmetric;
+    return A_view.type_view != oneapi::math::sparse::matrix_descr::general || is_symmetric;
 }
 
 template <typename fpType>
diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp
index 153862f53..f5182043d 100644
--- a/tests/unit_tests/sparse_blas/include/test_spmm.hpp
+++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp
@@ -26,8 +26,8 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 
 #include "common_sparse_reference.hpp"
 #include "test_common.hpp"
@@ -55,17 +55,17 @@ template <typename fpType, typename testFunctorI32, typename testFunctorI64>
 void test_helper_with_format_with_transpose(
     testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev,
     sparse_matrix_format_t format,
-    const std::vector<oneapi::mkl::sparse::spmm_alg>& non_default_algorithms,
-    oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, int& num_passed,
+    const std::vector<oneapi::math::sparse::spmm_alg>& non_default_algorithms,
+    oneapi::math::transpose transpose_A, oneapi::math::transpose transpose_B, int& num_passed,
     int& num_skipped) {
     sycl::property_list queue_properties;
     double density_A_matrix = 0.8;
     fpType fp_zero = set_fp_value<fpType>()(0.f, 0.f);
     fpType fp_one = set_fp_value<fpType>()(1.f, 0.f);
-    oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero;
-    oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major;
-    oneapi::mkl::sparse::spmm_alg default_alg = oneapi::mkl::sparse::spmm_alg::default_alg;
-    oneapi::mkl::sparse::matrix_view default_A_view;
+    oneapi::math::index_base index_zero = oneapi::math::index_base::zero;
+    oneapi::math::layout col_major = oneapi::math::layout::col_major;
+    oneapi::math::sparse::spmm_alg default_alg = oneapi::math::sparse::spmm_alg::default_alg;
+    oneapi::math::sparse::matrix_view default_A_view;
     bool no_reset_data = false;
     bool no_scalars_on_device = false;
 
@@ -75,10 +75,10 @@ void test_helper_with_format_with_transpose(
 
     {
         int m = 4, k = 6, n = 5;
-        int nrows_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? k : m;
-        int ncols_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? m : k;
-        int nrows_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? n : k;
-        int ncols_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? k : n;
+        int nrows_A = (transpose_A != oneapi::math::transpose::nontrans) ? k : m;
+        int ncols_A = (transpose_A != oneapi::math::transpose::nontrans) ? m : k;
+        int nrows_B = (transpose_B != oneapi::math::transpose::nontrans) ? n : k;
+        int ncols_B = (transpose_B != oneapi::math::transpose::nontrans) ? k : n;
         int nrows_C = m;
         int ncols_C = n;
         int ldb = nrows_B;
@@ -108,9 +108,10 @@ void test_helper_with_format_with_transpose(
         // Test index_base 1
         EXPECT_TRUE_OR_FUTURE_SKIP(
             test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, ncols_C,
-                             density_A_matrix, oneapi::mkl::index_base::one, col_major, transpose_A,
-                             transpose_B, fp_one, fp_zero, ldb, ldc, default_alg, default_A_view,
-                             default_properties, no_reset_data, no_scalars_on_device),
+                             density_A_matrix, oneapi::math::index_base::one, col_major,
+                             transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, default_alg,
+                             default_A_view, default_properties, no_reset_data,
+                             no_scalars_on_device),
             num_passed, num_skipped);
         // Test non-default alpha
         EXPECT_TRUE_OR_FUTURE_SKIP(
@@ -159,7 +160,7 @@ void test_helper_with_format_with_transpose(
         // Test row major layout
         EXPECT_TRUE_OR_FUTURE_SKIP(
             test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, ncols_C,
-                             density_A_matrix, index_zero, oneapi::mkl::layout::row_major,
+                             density_A_matrix, index_zero, oneapi::math::layout::row_major,
                              transpose_A, transpose_B, fp_one, fp_zero, ncols_B, ncols_C,
                              default_alg, default_A_view, default_properties, no_reset_data,
                              no_scalars_on_device),
@@ -203,9 +204,9 @@ void test_helper_with_format_with_transpose(
     {
         // Test different sizes
         int m = 6, k = 2, n = 5;
-        int nrows_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? k : m;
-        int ncols_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? m : k;
-        int nrows_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? n : k;
+        int nrows_A = (transpose_A != oneapi::math::transpose::nontrans) ? k : m;
+        int ncols_A = (transpose_A != oneapi::math::transpose::nontrans) ? m : k;
+        int nrows_B = (transpose_B != oneapi::math::transpose::nontrans) ? n : k;
         int nrows_C = m;
         int ncols_C = n;
         int ldb = nrows_B;
@@ -235,11 +236,11 @@ template <typename fpType, typename testFunctorI32, typename testFunctorI64>
 void test_helper_with_format(
     testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev,
     sparse_matrix_format_t format,
-    const std::vector<oneapi::mkl::sparse::spmm_alg>& non_default_algorithms, int& num_passed,
+    const std::vector<oneapi::math::sparse::spmm_alg>& non_default_algorithms, int& num_passed,
     int& num_skipped) {
-    std::vector<oneapi::mkl::transpose> transpose_vals{ oneapi::mkl::transpose::nontrans,
-                                                        oneapi::mkl::transpose::trans,
-                                                        oneapi::mkl::transpose::conjtrans };
+    std::vector<oneapi::math::transpose> transpose_vals{ oneapi::math::transpose::nontrans,
+                                                         oneapi::math::transpose::trans,
+                                                         oneapi::math::transpose::conjtrans };
     for (auto transpose_A : transpose_vals) {
         for (auto transpose_B : transpose_vals) {
             test_helper_with_format_with_transpose<fpType>(
@@ -264,14 +265,14 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6
                  sycl::device* dev, int& num_passed, int& num_skipped) {
     test_helper_with_format<fpType>(
         test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR,
-        { oneapi::mkl::sparse::spmm_alg::no_optimize_alg, oneapi::mkl::sparse::spmm_alg::csr_alg1,
-          oneapi::mkl::sparse::spmm_alg::csr_alg2, oneapi::mkl::sparse::spmm_alg::csr_alg3 },
+        { oneapi::math::sparse::spmm_alg::no_optimize_alg, oneapi::math::sparse::spmm_alg::csr_alg1,
+          oneapi::math::sparse::spmm_alg::csr_alg2, oneapi::math::sparse::spmm_alg::csr_alg3 },
         num_passed, num_skipped);
     test_helper_with_format<fpType>(
         test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::COO,
-        { oneapi::mkl::sparse::spmm_alg::no_optimize_alg, oneapi::mkl::sparse::spmm_alg::coo_alg1,
-          oneapi::mkl::sparse::spmm_alg::coo_alg2, oneapi::mkl::sparse::spmm_alg::coo_alg3,
-          oneapi::mkl::sparse::spmm_alg::coo_alg4 },
+        { oneapi::math::sparse::spmm_alg::no_optimize_alg, oneapi::math::sparse::spmm_alg::coo_alg1,
+          oneapi::math::sparse::spmm_alg::coo_alg2, oneapi::math::sparse::spmm_alg::coo_alg3,
+          oneapi::math::sparse::spmm_alg::coo_alg4 },
         num_passed, num_skipped);
 }
 
@@ -280,10 +281,10 @@ template <typename fpType, typename intType>
 void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType* ia,
                                  const intType* ja, const fpType* a, intType a_nrows,
                                  intType a_ncols, intType c_ncols, intType a_nnz, intType indexing,
-                                 oneapi::mkl::layout dense_matrix_layout,
-                                 oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
+                                 oneapi::math::layout dense_matrix_layout,
+                                 oneapi::math::transpose opA, oneapi::math::transpose opB,
                                  fpType alpha, fpType beta, intType ldb, intType ldc,
-                                 const fpType* b, oneapi::mkl::sparse::matrix_view A_view,
+                                 const fpType* b, oneapi::math::sparse::matrix_view A_view,
                                  fpType* c_ref) {
     std::size_t a_nrows_u = static_cast<std::size_t>(a_nrows);
     std::size_t a_ncols_u = static_cast<std::size_t>(a_ncols);
@@ -301,8 +302,8 @@ void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType* i
 
     // Return the linear index to access a dense matrix from
     auto dense_linear_idx = [=](std::size_t row, std::size_t col, std::size_t ld) {
-        return (dense_matrix_layout == oneapi::mkl::layout::row_major) ? row * ld + col
-                                                                       : col * ld + row;
+        return (dense_matrix_layout == oneapi::math::layout::row_major) ? row * ld + col
+                                                                        : col * ld + row;
     };
 
     //
diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp
index 50b5aa7db..b191f20fb 100644
--- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp
+++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp
@@ -26,8 +26,8 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 
 #include "common_sparse_reference.hpp"
 #include "test_common.hpp"
@@ -54,16 +54,16 @@ template <typename fpType, typename testFunctorI32, typename testFunctorI64>
 void test_helper_with_format_with_transpose(
     testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev,
     sparse_matrix_format_t format,
-    const std::vector<oneapi::mkl::sparse::spmv_alg>& non_default_algorithms,
-    oneapi::mkl::transpose transpose_val, int& num_passed, int& num_skipped) {
+    const std::vector<oneapi::math::sparse::spmv_alg>& non_default_algorithms,
+    oneapi::math::transpose transpose_val, int& num_passed, int& num_skipped) {
     sycl::property_list queue_properties;
     double density_A_matrix = 0.8;
     fpType fp_zero = set_fp_value<fpType>()(0.f, 0.f);
     fpType fp_one = set_fp_value<fpType>()(1.f, 0.f);
     int nrows_A = 4, ncols_A = 6;
-    oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero;
-    oneapi::mkl::sparse::spmv_alg default_alg = oneapi::mkl::sparse::spmv_alg::default_alg;
-    oneapi::mkl::sparse::matrix_view default_A_view;
+    oneapi::math::index_base index_zero = oneapi::math::index_base::zero;
+    oneapi::math::sparse::spmv_alg default_alg = oneapi::math::sparse::spmv_alg::default_alg;
+    oneapi::math::sparse::matrix_view default_A_view;
     bool no_reset_data = false;
     bool no_scalars_on_device = false;
 
@@ -92,7 +92,7 @@ void test_helper_with_format_with_transpose(
     // Test index_base 1
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
-                         oneapi::mkl::index_base::one, transpose_val, fp_one, fp_zero, default_alg,
+                         oneapi::math::index_base::one, transpose_val, fp_one, fp_zero, default_alg,
                          default_A_view, default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Test non-default alpha
@@ -128,24 +128,24 @@ void test_helper_with_format_with_transpose(
                          default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Lower triangular
-    oneapi::mkl::sparse::matrix_view triangular_A_view(
-        oneapi::mkl::sparse::matrix_descr::triangular);
+    oneapi::math::sparse::matrix_view triangular_A_view(
+        oneapi::math::sparse::matrix_descr::triangular);
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg, triangular_A_view,
                          default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Upper triangular
-    triangular_A_view.uplo_view = oneapi::mkl::uplo::upper;
+    triangular_A_view.uplo_view = oneapi::math::uplo::upper;
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg, triangular_A_view,
                          default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Lower triangular unit diagonal
-    oneapi::mkl::sparse::matrix_view triangular_unit_A_view(
-        oneapi::mkl::sparse::matrix_descr::triangular);
-    triangular_unit_A_view.diag_view = oneapi::mkl::diag::unit;
+    oneapi::math::sparse::matrix_view triangular_unit_A_view(
+        oneapi::math::sparse::matrix_descr::triangular);
+    triangular_unit_A_view.diag_view = oneapi::math::diag::unit;
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg,
@@ -153,7 +153,7 @@ void test_helper_with_format_with_transpose(
                          no_scalars_on_device),
         num_passed, num_skipped);
     // Upper triangular unit diagonal
-    triangular_A_view.uplo_view = oneapi::mkl::uplo::upper;
+    triangular_A_view.uplo_view = oneapi::math::uplo::upper;
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg,
@@ -161,28 +161,28 @@ void test_helper_with_format_with_transpose(
                          no_scalars_on_device),
         num_passed, num_skipped);
     // Lower symmetric
-    oneapi::mkl::sparse::matrix_view symmetric_view(oneapi::mkl::sparse::matrix_descr::symmetric);
+    oneapi::math::sparse::matrix_view symmetric_view(oneapi::math::sparse::matrix_descr::symmetric);
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg, symmetric_view,
                          default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Upper symmetric
-    symmetric_view.uplo_view = oneapi::mkl::uplo::upper;
+    symmetric_view.uplo_view = oneapi::math::uplo::upper;
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg, symmetric_view,
                          default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Lower hermitian
-    oneapi::mkl::sparse::matrix_view hermitian_view(oneapi::mkl::sparse::matrix_descr::hermitian);
+    oneapi::math::sparse::matrix_view hermitian_view(oneapi::math::sparse::matrix_descr::hermitian);
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg, hermitian_view,
                          default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Upper hermitian
-    hermitian_view.uplo_view = oneapi::mkl::uplo::upper;
+    hermitian_view.uplo_view = oneapi::math::uplo::upper;
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, nrows_A, ncols_A, density_A_matrix,
                          index_zero, transpose_val, fp_one, fp_zero, default_alg, hermitian_view,
@@ -228,11 +228,11 @@ template <typename fpType, typename testFunctorI32, typename testFunctorI64>
 void test_helper_with_format(
     testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev,
     sparse_matrix_format_t format,
-    const std::vector<oneapi::mkl::sparse::spmv_alg>& non_default_algorithms, int& num_passed,
+    const std::vector<oneapi::math::sparse::spmv_alg>& non_default_algorithms, int& num_passed,
     int& num_skipped) {
-    std::vector<oneapi::mkl::transpose> transpose_vals{ oneapi::mkl::transpose::nontrans,
-                                                        oneapi::mkl::transpose::trans,
-                                                        oneapi::mkl::transpose::conjtrans };
+    std::vector<oneapi::math::transpose> transpose_vals{ oneapi::math::transpose::nontrans,
+                                                         oneapi::math::transpose::trans,
+                                                         oneapi::math::transpose::conjtrans };
     for (auto transpose_A : transpose_vals) {
         test_helper_with_format_with_transpose<fpType>(test_functor_i32, test_functor_i64, dev,
                                                        format, non_default_algorithms, transpose_A,
@@ -255,13 +255,13 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6
                  sycl::device* dev, int& num_passed, int& num_skipped) {
     test_helper_with_format<fpType>(
         test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR,
-        { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::csr_alg1,
-          oneapi::mkl::sparse::spmv_alg::csr_alg2, oneapi::mkl::sparse::spmv_alg::csr_alg3 },
+        { oneapi::math::sparse::spmv_alg::no_optimize_alg, oneapi::math::sparse::spmv_alg::csr_alg1,
+          oneapi::math::sparse::spmv_alg::csr_alg2, oneapi::math::sparse::spmv_alg::csr_alg3 },
         num_passed, num_skipped);
     test_helper_with_format<fpType>(
         test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::COO,
-        { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::coo_alg1,
-          oneapi::mkl::sparse::spmv_alg::coo_alg2 },
+        { oneapi::math::sparse::spmv_alg::no_optimize_alg, oneapi::math::sparse::spmv_alg::coo_alg1,
+          oneapi::math::sparse::spmv_alg::coo_alg2 },
         num_passed, num_skipped);
 }
 
@@ -270,8 +270,8 @@ template <typename fpType, typename intType>
 void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType* ia,
                                  const intType* ja, const fpType* a, intType a_nrows,
                                  intType a_ncols, intType a_nnz, intType indexing,
-                                 oneapi::mkl::transpose opA, fpType alpha, fpType beta,
-                                 const fpType* x, oneapi::mkl::sparse::matrix_view A_view,
+                                 oneapi::math::transpose opA, fpType alpha, fpType beta,
+                                 const fpType* x, oneapi::math::sparse::matrix_view A_view,
                                  fpType* y_ref) {
     std::size_t a_nrows_u = static_cast<std::size_t>(a_nrows);
     std::size_t a_ncols_u = static_cast<std::size_t>(a_ncols);
diff --git a/tests/unit_tests/sparse_blas/include/test_spsv.hpp b/tests/unit_tests/sparse_blas/include/test_spsv.hpp
index 94f5eacb1..54221e3e7 100644
--- a/tests/unit_tests/sparse_blas/include/test_spsv.hpp
+++ b/tests/unit_tests/sparse_blas/include/test_spsv.hpp
@@ -26,8 +26,8 @@
 #include <CL/sycl.hpp>
 #endif
 
-#include "oneapi/mkl.hpp"
-#include "oneapi/mkl/detail/config.hpp"
+#include "oneapi/math.hpp"
+#include "oneapi/math/detail/config.hpp"
 
 #include "common_sparse_reference.hpp"
 #include "test_common.hpp"
@@ -49,18 +49,20 @@
 template <typename fpType, typename testFunctorI32, typename testFunctorI64>
 void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64,
                              sycl::device* dev, sparse_matrix_format_t format,
-                             oneapi::mkl::transpose transpose_val, int& num_passed,
+                             oneapi::math::transpose transpose_val, int& num_passed,
                              int& num_skipped) {
     sycl::property_list queue_properties;
     double density_A_matrix = 0.144;
     fpType alpha = set_fp_value<fpType>()(1.f, 0.f);
     int m = 277;
-    oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero;
-    oneapi::mkl::sparse::spsv_alg default_alg = oneapi::mkl::sparse::spsv_alg::default_alg;
-    oneapi::mkl::sparse::spsv_alg no_optimize_alg = oneapi::mkl::sparse::spsv_alg::no_optimize_alg;
-    oneapi::mkl::sparse::matrix_view default_A_view(oneapi::mkl::sparse::matrix_descr::triangular);
-    oneapi::mkl::sparse::matrix_view upper_A_view(oneapi::mkl::sparse::matrix_descr::triangular);
-    upper_A_view.uplo_view = oneapi::mkl::uplo::upper;
+    oneapi::math::index_base index_zero = oneapi::math::index_base::zero;
+    oneapi::math::sparse::spsv_alg default_alg = oneapi::math::sparse::spsv_alg::default_alg;
+    oneapi::math::sparse::spsv_alg no_optimize_alg =
+        oneapi::math::sparse::spsv_alg::no_optimize_alg;
+    oneapi::math::sparse::matrix_view default_A_view(
+        oneapi::math::sparse::matrix_descr::triangular);
+    oneapi::math::sparse::matrix_view upper_A_view(oneapi::math::sparse::matrix_descr::triangular);
+    upper_A_view.uplo_view = oneapi::math::uplo::upper;
     bool no_reset_data = false;
     bool no_scalars_on_device = false;
 
@@ -89,7 +91,7 @@ void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 tes
     // Test index_base 1
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, m, density_A_matrix,
-                         oneapi::mkl::index_base::one, transpose_val, alpha, default_alg,
+                         oneapi::math::index_base::one, transpose_val, alpha, default_alg,
                          default_A_view, default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Test upper triangular matrix
@@ -99,16 +101,16 @@ void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 tes
                          no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Test lower triangular unit diagonal matrix
-    oneapi::mkl::sparse::matrix_view triangular_unit_A_view(
-        oneapi::mkl::sparse::matrix_descr::triangular);
-    triangular_unit_A_view.diag_view = oneapi::mkl::diag::unit;
+    oneapi::math::sparse::matrix_view triangular_unit_A_view(
+        oneapi::math::sparse::matrix_descr::triangular);
+    triangular_unit_A_view.diag_view = oneapi::math::diag::unit;
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, m, density_A_matrix, index_zero,
                          transpose_val, alpha, default_alg, triangular_unit_A_view,
                          default_properties, no_reset_data, no_scalars_on_device),
         num_passed, num_skipped);
     // Test upper triangular unit diagonal matrix
-    triangular_unit_A_view.uplo_view = oneapi::mkl::uplo::upper;
+    triangular_unit_A_view.uplo_view = oneapi::math::uplo::upper;
     EXPECT_TRUE_OR_FUTURE_SKIP(
         test_functor_i32(dev, queue_properties, format, m, density_A_matrix, index_zero,
                          transpose_val, alpha, default_alg, triangular_unit_A_view,
@@ -174,7 +176,7 @@ void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 tes
  */
 template <typename fpType, typename testFunctorI32, typename testFunctorI64>
 void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64,
-                 sycl::device* dev, oneapi::mkl::transpose transpose_val, int& num_passed,
+                 sycl::device* dev, oneapi::math::transpose transpose_val, int& num_passed,
                  int& num_skipped) {
     test_helper_with_format<fpType>(test_functor_i32, test_functor_i64, dev,
                                     sparse_matrix_format_t::CSR, transpose_val, num_passed,
@@ -188,8 +190,8 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6
 template <typename fpType, typename intType>
 void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType* ia,
                                  const intType* ja, const fpType* a, intType m, intType nnz,
-                                 intType indexing, oneapi::mkl::transpose opA, const fpType* x,
-                                 fpType alpha, oneapi::mkl::sparse::matrix_view A_view,
+                                 intType indexing, oneapi::math::transpose opA, const fpType* x,
+                                 fpType alpha, oneapi::math::sparse::matrix_view A_view,
                                  fpType* y_ref) {
     std::size_t mu = static_cast<std::size_t>(m);
     auto dense_opa = sparse_to_dense(format, ia, ja, a, mu, mu, static_cast<std::size_t>(nnz),
@@ -202,8 +204,8 @@ void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType* i
     //
     // Compute each element of the reference one after the other starting from 0 (resp. the end) for a lower (resp. upper) triangular matrix.
     // A matrix is considered lowered if it is lower and not transposed or upper and transposed.
-    const bool is_lower =
-        (A_view.uplo_view == oneapi::mkl::uplo::lower) == (opA == oneapi::mkl::transpose::nontrans);
+    const bool is_lower = (A_view.uplo_view == oneapi::math::uplo::lower) ==
+                          (opA == oneapi::math::transpose::nontrans);
     for (std::size_t row = 0; row < mu; row++) {
         std::size_t uplo_row = is_lower ? row : (mu - 1 - row);
         fpType rhs = alpha * x[uplo_row];
diff --git a/tests/unit_tests/sparse_blas/source/CMakeLists.txt b/tests/unit_tests/sparse_blas/source/CMakeLists.txt
index a9271bfa2..c0e86cbae 100644
--- a/tests/unit_tests/sparse_blas/source/CMakeLists.txt
+++ b/tests/unit_tests/sparse_blas/source/CMakeLists.txt
@@ -41,9 +41,9 @@ if (BUILD_SHARED_LIBS)
     if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
         add_sycl_to_target(TARGET spblas_source_rt SOURCES ${SPBLAS_SOURCES})
     else ()
-        target_link_libraries(spblas_source_rt PUBLIC ONEMKL::SYCL::SYCL)
+        target_link_libraries(spblas_source_rt PUBLIC ONEMATH::SYCL::SYCL)
     endif ()
-    target_link_libraries(spblas_source_rt PRIVATE onemkl_warnings)
+    target_link_libraries(spblas_source_rt PRIVATE onemath_warnings)
 endif ()
 
 add_library(spblas_source_ct OBJECT ${SPBLAS_SOURCES})
@@ -58,6 +58,6 @@ target_include_directories(spblas_source_ct
 if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
     add_sycl_to_target(TARGET spblas_source_ct SOURCES ${SPBLAS_SOURCES})
 else ()
-    target_link_libraries(spblas_source_ct PUBLIC ONEMKL::SYCL::SYCL)
+    target_link_libraries(spblas_source_ct PUBLIC ONEMATH::SYCL::SYCL)
 endif ()
-target_link_libraries(spblas_source_ct PRIVATE onemkl_warnings)
+target_link_libraries(spblas_source_ct PRIVATE onemath_warnings)
diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp
index 50f0fb2e7..61f92d6cf 100644
--- a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp
+++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp
@@ -30,12 +30,12 @@ namespace {
 template <typename fpType, typename intType>
 int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
               sparse_matrix_format_t format, intType nrows_A, intType ncols_A, intType ncols_C,
-              double density_A_matrix, oneapi::mkl::index_base index,
-              oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A,
-              oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb,
-              intType ldc, oneapi::mkl::sparse::spmm_alg alg,
-              oneapi::mkl::sparse::matrix_view A_view,
-              const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties,
+              double density_A_matrix, oneapi::math::index_base index,
+              oneapi::math::layout dense_matrix_layout, oneapi::math::transpose transpose_A,
+              oneapi::math::transpose transpose_B, fpType alpha, fpType beta, intType ldb,
+              intType ldc, oneapi::math::sparse::spmm_alg alg,
+              oneapi::math::sparse::matrix_view A_view,
+              const std::set<oneapi::math::sparse::matrix_property>& matrix_properties,
               bool reset_data, bool test_scalar_on_device) {
     if (test_scalar_on_device) {
         // Scalars on the device is not planned to be supported with the buffer API
@@ -51,9 +51,9 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
     }
     auto [opa_nrows, opa_ncols] = swap_if_transposed<std::size_t>(transpose_A, nrows_A, ncols_A);
     auto [opb_nrows, opb_ncols] = swap_if_transposed<std::int64_t>(transpose_B, opa_ncols, ncols_C);
-    intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1;
+    intType indexing = (index == oneapi::math::index_base::zero) ? 0 : 1;
     const bool is_symmetric =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) !=
         matrix_properties.cend();
 
     // Input matrix
@@ -82,35 +82,36 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
     auto b_buf = make_buffer(b_host);
     auto c_buf = make_buffer(c_host);
 
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::dense_matrix_handle_t B_handle = nullptr;
-    oneapi::mkl::sparse::dense_matrix_handle_t C_handle = nullptr;
-    oneapi::mkl::sparse::spmm_descr_t descr = nullptr;
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::dense_matrix_handle_t B_handle = nullptr;
+    oneapi::math::sparse::dense_matrix_handle_t C_handle = nullptr;
+    oneapi::math::sparse::spmm_descr_t descr = nullptr;
     try {
         init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf,
                            ja_buf, a_buf);
         for (auto property : matrix_properties) {
-            CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property);
+            CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle,
+                          property);
         }
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows,
                       opb_ncols, ldb, dense_matrix_layout, b_buf);
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_matrix, main_queue, &C_handle,
                       static_cast<std::int64_t>(opa_nrows), ncols_C, ldc, dense_matrix_layout,
                       c_buf);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmm_descr, main_queue, &descr);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_spmm_descr, main_queue, &descr);
 
         std::size_t workspace_size = 0;
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B,
+        CALL_RT_OR_CT(oneapi::math::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B,
                       &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr,
                       workspace_size);
         sycl::buffer<std::uint8_t, 1> workspace_buf((sycl::range<1>(workspace_size)));
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, transpose_B,
+        CALL_RT_OR_CT(oneapi::math::sparse::spmm_optimize, main_queue, transpose_A, transpose_B,
                       &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr,
                       workspace_buf);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha,
+        CALL_RT_OR_CT(oneapi::math::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha,
                       A_view, A_handle, B_handle, &beta, C_handle, alg, descr);
 
         if (reset_data) {
@@ -136,18 +137,18 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
                             ja_buf, a_buf);
 
             std::size_t workspace_size_2 = 0;
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmm_buffer_size, main_queue, transpose_A,
                           transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg,
                           descr, workspace_size_2);
             if (workspace_size_2 > workspace_size) {
                 workspace_buf = sycl::buffer<std::uint8_t, 1>((sycl::range<1>(workspace_size_2)));
             }
 
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, transpose_B,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmm_optimize, main_queue, transpose_A, transpose_B,
                           &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr,
                           workspace_buf);
 
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha,
                           A_view, A_handle, B_handle, &beta, C_handle, alg, descr);
         }
     }
@@ -157,11 +158,11 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
         print_error_code(e);
         return 0;
     }
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         wait_and_free_handles(main_queue, A_handle, B_handle, C_handle);
         if (descr) {
             sycl::event ev_release_descr;
-            CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue,
+            CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spmm_descr, main_queue,
                           descr);
             ev_release_descr.wait();
         }
@@ -171,7 +172,7 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
         std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl;
         return 0;
     }
-    CALL_RT_OR_CT(oneapi::mkl::sparse::release_spmm_descr, main_queue, descr);
+    CALL_RT_OR_CT(oneapi::math::sparse::release_spmm_descr, main_queue, descr);
     free_handles(main_queue, A_handle, B_handle, C_handle);
 
     // Compute reference.
diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp
index 1db7c7a25..8f31bf705 100644
--- a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp
+++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp
@@ -30,12 +30,12 @@ namespace {
 template <typename fpType, typename intType>
 int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
               sparse_matrix_format_t format, intType nrows_A, intType ncols_A, intType ncols_C,
-              double density_A_matrix, oneapi::mkl::index_base index,
-              oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A,
-              oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb,
-              intType ldc, oneapi::mkl::sparse::spmm_alg alg,
-              oneapi::mkl::sparse::matrix_view A_view,
-              const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties,
+              double density_A_matrix, oneapi::math::index_base index,
+              oneapi::math::layout dense_matrix_layout, oneapi::math::transpose transpose_A,
+              oneapi::math::transpose transpose_B, fpType alpha, fpType beta, intType ldb,
+              intType ldc, oneapi::math::sparse::spmm_alg alg,
+              oneapi::math::sparse::matrix_view A_view,
+              const std::set<oneapi::math::sparse::matrix_property>& matrix_properties,
               bool reset_data, bool test_scalar_on_device) {
     sycl::queue main_queue(*dev, exception_handler_t(), queue_properties);
 
@@ -47,9 +47,9 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
     }
     auto [opa_nrows, opa_ncols] = swap_if_transposed<std::size_t>(transpose_A, nrows_A, ncols_A);
     auto [opb_nrows, opb_ncols] = swap_if_transposed<std::int64_t>(transpose_B, opa_ncols, ncols_C);
-    intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1;
+    intType indexing = (index == oneapi::math::index_base::zero) ? 0 : 1;
     const bool is_symmetric =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) !=
         matrix_properties.cend();
 
     // Input matrix
@@ -106,37 +106,38 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
     }
 
     sycl::event ev_copy, ev_spmm;
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::dense_matrix_handle_t B_handle = nullptr;
-    oneapi::mkl::sparse::dense_matrix_handle_t C_handle = nullptr;
-    oneapi::mkl::sparse::spmm_descr_t descr = nullptr;
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::dense_matrix_handle_t B_handle = nullptr;
+    oneapi::math::sparse::dense_matrix_handle_t C_handle = nullptr;
+    oneapi::math::sparse::spmm_descr_t descr = nullptr;
     std::unique_ptr<std::uint8_t, UsmDeleter> workspace_usm(nullptr, UsmDeleter(main_queue));
     try {
         init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm,
                            ja_usm, a_usm);
         for (auto property : matrix_properties) {
-            CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property);
+            CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle,
+                          property);
         }
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows,
                       opb_ncols, ldb, dense_matrix_layout, b_usm);
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_matrix, main_queue, &C_handle,
                       static_cast<std::int64_t>(opa_nrows), ncols_C, ldc, dense_matrix_layout,
                       c_usm);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmm_descr, main_queue, &descr);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_spmm_descr, main_queue, &descr);
 
         std::size_t workspace_size = 0;
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B,
+        CALL_RT_OR_CT(oneapi::math::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B,
                       &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr,
                       workspace_size);
         workspace_usm = malloc_device_uptr<std::uint8_t>(main_queue, workspace_size);
 
         sycl::event ev_opt;
-        CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A,
+        CALL_RT_OR_CT(ev_opt = oneapi::math::sparse::spmm_optimize, main_queue, transpose_A,
                       transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr,
                       workspace_usm.get(), dependencies);
 
-        CALL_RT_OR_CT(ev_spmm = oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B,
+        CALL_RT_OR_CT(ev_spmm = oneapi::math::sparse::spmm, main_queue, transpose_A, transpose_B,
                       &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, { ev_opt });
 
         if (reset_data) {
@@ -170,20 +171,20 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
                             ja_usm, a_usm);
 
             std::size_t workspace_size_2 = 0;
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmm_buffer_size, main_queue, transpose_A,
                           transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg,
                           descr, workspace_size_2);
             if (workspace_size_2 > workspace_size) {
                 workspace_usm = malloc_device_uptr<std::uint8_t>(main_queue, workspace_size_2);
             }
 
-            CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A,
+            CALL_RT_OR_CT(ev_opt = oneapi::math::sparse::spmm_optimize, main_queue, transpose_A,
                           transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg,
                           descr, workspace_usm.get(), dependencies);
 
-            CALL_RT_OR_CT(ev_spmm = oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B,
-                          &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr,
-                          { ev_opt });
+            CALL_RT_OR_CT(ev_spmm = oneapi::math::sparse::spmm, main_queue, transpose_A,
+                          transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg,
+                          descr, { ev_opt });
         }
 
         ev_copy = main_queue.memcpy(c_host.data(), c_usm, c_host.size() * sizeof(fpType), ev_spmm);
@@ -194,11 +195,11 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
         print_error_code(e);
         return 0;
     }
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         wait_and_free_handles(main_queue, A_handle, B_handle, C_handle);
         if (descr) {
             sycl::event ev_release_descr;
-            CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue,
+            CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spmm_descr, main_queue,
                           descr);
             ev_release_descr.wait();
         }
@@ -209,7 +210,7 @@ int test_spmm(sycl::device* dev, sycl::property_list queue_properties,
         return 0;
     }
     sycl::event ev_release_descr;
-    CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue, descr,
+    CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spmm_descr, main_queue, descr,
                   { ev_spmm });
     ev_release_descr.wait_and_throw();
     free_handles(main_queue, { ev_spmm }, A_handle, B_handle, C_handle);
diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp
index 96328372d..ab3c45e7a 100644
--- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp
+++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp
@@ -30,10 +30,10 @@ namespace {
 template <typename fpType, typename intType>
 int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
               sparse_matrix_format_t format, intType nrows_A, intType ncols_A,
-              double density_A_matrix, oneapi::mkl::index_base index,
-              oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta,
-              oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view,
-              const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties,
+              double density_A_matrix, oneapi::math::index_base index,
+              oneapi::math::transpose transpose_val, fpType alpha, fpType beta,
+              oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::matrix_view A_view,
+              const std::set<oneapi::math::sparse::matrix_property>& matrix_properties,
               bool reset_data, bool test_scalar_on_device) {
     if (test_scalar_on_device) {
         // Scalars on the device is not planned to be supported with the buffer API
@@ -45,9 +45,9 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
         ncols_A = nrows_A;
     }
     auto [opa_nrows, opa_ncols] = swap_if_transposed<std::size_t>(transpose_val, nrows_A, ncols_A);
-    intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1;
+    intType indexing = (index == oneapi::math::index_base::zero) ? 0 : 1;
     const bool is_symmetric =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) !=
         matrix_properties.cend();
 
     // Input matrix
@@ -75,32 +75,33 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
     auto x_buf = make_buffer(x_host);
     auto y_buf = make_buffer(y_host);
 
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
-    oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::math::sparse::spmv_descr_t descr = nullptr;
     try {
         init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf,
                            ja_buf, a_buf);
         for (auto property : matrix_properties) {
-            CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property);
+            CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle,
+                          property);
         }
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle,
                       static_cast<std::int64_t>(x_host.size()), x_buf);
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &y_handle,
                       static_cast<std::int64_t>(y_host.size()), y_buf);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmv_descr, main_queue, &descr);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_spmv_descr, main_queue, &descr);
 
         std::size_t workspace_size = 0;
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha,
+        CALL_RT_OR_CT(oneapi::math::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha,
                       A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size);
         sycl::buffer<std::uint8_t, 1> workspace_buf((sycl::range<1>(workspace_size)));
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, &alpha, A_view,
-                      A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf);
+        CALL_RT_OR_CT(oneapi::math::sparse::spmv_optimize, main_queue, transpose_val, &alpha,
+                      A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, A_view,
+        CALL_RT_OR_CT(oneapi::math::sparse::spmv, main_queue, transpose_val, &alpha, A_view,
                       A_handle, x_handle, &beta, y_handle, alg, descr);
 
         if (reset_data) {
@@ -126,17 +127,17 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
                             ja_buf, a_buf);
 
             std::size_t workspace_size_2 = 0;
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha,
                           A_view, A_handle, x_handle, &beta, y_handle, alg, descr,
                           workspace_size_2);
             if (workspace_size_2 > workspace_size) {
                 workspace_buf = sycl::buffer<std::uint8_t, 1>((sycl::range<1>(workspace_size_2)));
             }
 
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, &alpha,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmv_optimize, main_queue, transpose_val, &alpha,
                           A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf);
 
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, A_view,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmv, main_queue, transpose_val, &alpha, A_view,
                           A_handle, x_handle, &beta, y_handle, alg, descr);
         }
     }
@@ -146,11 +147,11 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
         print_error_code(e);
         return 0;
     }
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         wait_and_free_handles(main_queue, A_handle, x_handle, y_handle);
         if (descr) {
             sycl::event ev_release_descr;
-            CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue,
+            CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spmv_descr, main_queue,
                           descr);
             ev_release_descr.wait();
         }
@@ -160,7 +161,7 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
         std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl;
         return 0;
     }
-    CALL_RT_OR_CT(oneapi::mkl::sparse::release_spmv_descr, main_queue, descr);
+    CALL_RT_OR_CT(oneapi::math::sparse::release_spmv_descr, main_queue, descr);
     free_handles(main_queue, A_handle, x_handle, y_handle);
 
     // Compute reference.
diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp
index c6159aaf4..5284313c9 100644
--- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp
+++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp
@@ -30,10 +30,10 @@ namespace {
 template <typename fpType, typename intType>
 int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
               sparse_matrix_format_t format, intType nrows_A, intType ncols_A,
-              double density_A_matrix, oneapi::mkl::index_base index,
-              oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta,
-              oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view,
-              const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties,
+              double density_A_matrix, oneapi::math::index_base index,
+              oneapi::math::transpose transpose_val, fpType alpha, fpType beta,
+              oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::matrix_view A_view,
+              const std::set<oneapi::math::sparse::matrix_property>& matrix_properties,
               bool reset_data, bool test_scalar_on_device) {
     sycl::queue main_queue(*dev, exception_handler_t(), queue_properties);
 
@@ -41,9 +41,9 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
         ncols_A = nrows_A;
     }
     auto [opa_nrows, opa_ncols] = swap_if_transposed<std::size_t>(transpose_val, nrows_A, ncols_A);
-    intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1;
+    intType indexing = (index == oneapi::math::index_base::zero) ? 0 : 1;
     const bool is_symmetric =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) !=
         matrix_properties.cend();
 
     // Input matrix
@@ -99,36 +99,37 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
     }
 
     sycl::event ev_copy, ev_spmv;
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
-    oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::math::sparse::spmv_descr_t descr = nullptr;
     std::unique_ptr<std::uint8_t, UsmDeleter> workspace_usm(nullptr, UsmDeleter(main_queue));
     try {
         init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm,
                            ja_usm, a_usm);
         for (auto property : matrix_properties) {
-            CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property);
+            CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle,
+                          property);
         }
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle,
                       static_cast<std::int64_t>(x_host.size()), x_usm);
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle,
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &y_handle,
                       static_cast<std::int64_t>(y_host.size()), y_usm);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmv_descr, main_queue, &descr);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_spmv_descr, main_queue, &descr);
 
         std::size_t workspace_size = 0;
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val,
+        CALL_RT_OR_CT(oneapi::math::sparse::spmv_buffer_size, main_queue, transpose_val,
                       alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr,
                       y_handle, alg, descr, workspace_size);
         workspace_usm = malloc_device_uptr<std::uint8_t>(main_queue, workspace_size);
 
         sycl::event ev_opt;
-        CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val,
+        CALL_RT_OR_CT(ev_opt = oneapi::math::sparse::spmv_optimize, main_queue, transpose_val,
                       alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr,
                       y_handle, alg, descr, workspace_usm.get(), dependencies);
 
-        CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val,
+        CALL_RT_OR_CT(ev_spmv = oneapi::math::sparse::spmv, main_queue, transpose_val,
                       alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr,
                       y_handle, alg, descr, { ev_opt });
 
@@ -163,18 +164,18 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
                             ja_usm, a_usm);
 
             std::size_t workspace_size_2 = 0;
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val,
+            CALL_RT_OR_CT(oneapi::math::sparse::spmv_buffer_size, main_queue, transpose_val,
                           alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr,
                           y_handle, alg, descr, workspace_size_2);
             if (workspace_size_2 > workspace_size) {
                 workspace_usm = malloc_device_uptr<std::uint8_t>(main_queue, workspace_size_2);
             }
 
-            CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val,
+            CALL_RT_OR_CT(ev_opt = oneapi::math::sparse::spmv_optimize, main_queue, transpose_val,
                           alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr,
                           y_handle, alg, descr, workspace_usm.get(), dependencies);
 
-            CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val,
+            CALL_RT_OR_CT(ev_spmv = oneapi::math::sparse::spmv, main_queue, transpose_val,
                           alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr,
                           y_handle, alg, descr, { ev_opt });
         }
@@ -187,11 +188,11 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
         print_error_code(e);
         return 0;
     }
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         wait_and_free_handles(main_queue, A_handle, x_handle, y_handle);
         if (descr) {
             sycl::event ev_release_descr;
-            CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue,
+            CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spmv_descr, main_queue,
                           descr);
             ev_release_descr.wait();
         }
@@ -202,7 +203,7 @@ int test_spmv(sycl::device* dev, sycl::property_list queue_properties,
         return 0;
     }
     sycl::event ev_release_descr;
-    CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue, descr,
+    CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spmv_descr, main_queue, descr,
                   { ev_spmv });
     ev_release_descr.wait_and_throw();
     free_handles(main_queue, { ev_spmv }, A_handle, x_handle, y_handle);
diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp
index 19c237dc0..0fa162bac 100644
--- a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp
+++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp
@@ -30,9 +30,9 @@ namespace {
 template <typename fpType, typename intType>
 int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
               sparse_matrix_format_t format, intType m, double density_A_matrix,
-              oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha,
-              oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view,
-              const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties,
+              oneapi::math::index_base index, oneapi::math::transpose transpose_val, fpType alpha,
+              oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::matrix_view A_view,
+              const std::set<oneapi::math::sparse::matrix_property>& matrix_properties,
               bool reset_data, bool test_scalar_on_device) {
     if (test_scalar_on_device) {
         // Scalars on the device is not planned to be supported with the buffer API
@@ -40,10 +40,10 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
     }
     sycl::queue main_queue(*dev, exception_handler_t(), queue_properties);
 
-    intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1;
+    intType indexing = (index == oneapi::math::index_base::zero) ? 0 : 1;
     const std::size_t mu = static_cast<std::size_t>(m);
     const bool is_symmetric =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) !=
         matrix_properties.cend();
 
     // Use a fixed seed for operations very sensitive to the input data
@@ -54,8 +54,8 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
     std::vector<fpType> a_host;
     // Set non-zero values to the diagonal, except if the matrix is viewed as a unit matrix.
     const bool require_diagonal =
-        !(A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal &&
-          A_view.diag_view == oneapi::mkl::diag::unit);
+        !(A_view.type_view == oneapi::math::sparse::matrix_descr::diagonal &&
+          A_view.diag_view == oneapi::math::diag::unit);
     intType nnz =
         generate_random_matrix<fpType, intType>(format, m, m, density_A_matrix, indexing, ia_host,
                                                 ja_host, a_host, is_symmetric, require_diagonal);
@@ -80,29 +80,30 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
     auto x_buf = make_buffer(x_host);
     auto y_buf = make_buffer(y_host);
 
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
-    oneapi::mkl::sparse::spsv_descr_t descr = nullptr;
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::math::sparse::spsv_descr_t descr = nullptr;
     try {
         init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf);
         for (auto property : matrix_properties) {
-            CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property);
+            CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle,
+                          property);
         }
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, m, x_buf);
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, m, y_buf);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle, m, x_buf);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &y_handle, m, y_buf);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_spsv_descr, main_queue, &descr);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_spsv_descr, main_queue, &descr);
 
         std::size_t workspace_size = 0;
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha,
+        CALL_RT_OR_CT(oneapi::math::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha,
                       A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size);
         sycl::buffer<std::uint8_t, 1> workspace_buf((sycl::range<1>(workspace_size)));
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, &alpha, A_view,
-                      A_handle, x_handle, y_handle, alg, descr, workspace_buf);
+        CALL_RT_OR_CT(oneapi::math::sparse::spsv_optimize, main_queue, transpose_val, &alpha,
+                      A_view, A_handle, x_handle, y_handle, alg, descr, workspace_buf);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, A_view,
+        CALL_RT_OR_CT(oneapi::math::sparse::spsv, main_queue, transpose_val, &alpha, A_view,
                       A_handle, x_handle, y_handle, alg, descr);
 
         if (reset_data) {
@@ -126,16 +127,16 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
             set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf);
 
             std::size_t workspace_size_2 = 0;
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha,
+            CALL_RT_OR_CT(oneapi::math::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha,
                           A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size_2);
             if (workspace_size_2 > workspace_size) {
                 workspace_buf = sycl::buffer<std::uint8_t, 1>((sycl::range<1>(workspace_size_2)));
             }
 
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, &alpha,
+            CALL_RT_OR_CT(oneapi::math::sparse::spsv_optimize, main_queue, transpose_val, &alpha,
                           A_view, A_handle, x_handle, y_handle, alg, descr, workspace_buf);
 
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, A_view,
+            CALL_RT_OR_CT(oneapi::math::sparse::spsv, main_queue, transpose_val, &alpha, A_view,
                           A_handle, x_handle, y_handle, alg, descr);
         }
     }
@@ -145,11 +146,11 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
         print_error_code(e);
         return 0;
     }
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         wait_and_free_handles(main_queue, A_handle, x_handle, y_handle);
         if (descr) {
             sycl::event ev_release_descr;
-            CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue,
+            CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spsv_descr, main_queue,
                           descr);
             ev_release_descr.wait();
         }
@@ -159,7 +160,7 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
         std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl;
         return 0;
     }
-    CALL_RT_OR_CT(oneapi::mkl::sparse::release_spsv_descr, main_queue, descr);
+    CALL_RT_OR_CT(oneapi::math::sparse::release_spsv_descr, main_queue, descr);
     free_handles(main_queue, A_handle, x_handle, y_handle);
 
     // Compute reference.
@@ -180,9 +181,9 @@ TEST_P(SparseSpsvBufferTests, RealSinglePrecision) {
     using fpType = float;
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
@@ -195,9 +196,9 @@ TEST_P(SparseSpsvBufferTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
@@ -209,11 +210,11 @@ TEST_P(SparseSpsvBufferTests, ComplexSinglePrecision) {
     using fpType = std::complex<float>;
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::conjtrans, num_passed, num_skipped);
+                        oneapi::math::transpose::conjtrans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
@@ -226,11 +227,11 @@ TEST_P(SparseSpsvBufferTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::conjtrans, num_passed, num_skipped);
+                        oneapi::math::transpose::conjtrans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp
index 68023591b..398595480 100644
--- a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp
+++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp
@@ -30,16 +30,16 @@ namespace {
 template <typename fpType, typename intType>
 int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
               sparse_matrix_format_t format, intType m, double density_A_matrix,
-              oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha,
-              oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view,
-              const std::set<oneapi::mkl::sparse::matrix_property>& matrix_properties,
+              oneapi::math::index_base index, oneapi::math::transpose transpose_val, fpType alpha,
+              oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::matrix_view A_view,
+              const std::set<oneapi::math::sparse::matrix_property>& matrix_properties,
               bool reset_data, bool test_scalar_on_device) {
     sycl::queue main_queue(*dev, exception_handler_t(), queue_properties);
 
-    intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1;
+    intType indexing = (index == oneapi::math::index_base::zero) ? 0 : 1;
     const std::size_t mu = static_cast<std::size_t>(m);
     const bool is_symmetric =
-        matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) !=
+        matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) !=
         matrix_properties.cend();
 
     // Use a fixed seed for operations very sensitive to the input data
@@ -50,8 +50,8 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
     std::vector<fpType> a_host;
     // Set non-zero values to the diagonal, except if the matrix is viewed as a unit matrix.
     const bool require_diagonal =
-        !(A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal &&
-          A_view.diag_view == oneapi::mkl::diag::unit);
+        !(A_view.type_view == oneapi::math::sparse::matrix_descr::diagonal &&
+          A_view.diag_view == oneapi::math::diag::unit);
     intType nnz =
         generate_random_matrix<fpType, intType>(format, m, m, density_A_matrix, indexing, ia_host,
                                                 ja_host, a_host, is_symmetric, require_diagonal);
@@ -100,33 +100,34 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
     }
 
     sycl::event ev_copy, ev_spsv;
-    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
-    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
-    oneapi::mkl::sparse::spsv_descr_t descr = nullptr;
+    oneapi::math::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::math::sparse::spsv_descr_t descr = nullptr;
     std::unique_ptr<std::uint8_t, UsmDeleter> workspace_usm(nullptr, UsmDeleter(main_queue));
     try {
         init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm);
         for (auto property : matrix_properties) {
-            CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property);
+            CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle,
+                          property);
         }
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, m, x_usm);
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, m, y_usm);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle, m, x_usm);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &y_handle, m, y_usm);
 
-        CALL_RT_OR_CT(oneapi::mkl::sparse::init_spsv_descr, main_queue, &descr);
+        CALL_RT_OR_CT(oneapi::math::sparse::init_spsv_descr, main_queue, &descr);
 
         std::size_t workspace_size = 0;
-        CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val,
+        CALL_RT_OR_CT(oneapi::math::sparse::spsv_buffer_size, main_queue, transpose_val,
                       alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr,
                       workspace_size);
         workspace_usm = malloc_device_uptr<std::uint8_t>(main_queue, workspace_size);
 
         sycl::event ev_opt;
-        CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val,
+        CALL_RT_OR_CT(ev_opt = oneapi::math::sparse::spsv_optimize, main_queue, transpose_val,
                       alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr,
                       workspace_usm.get(), dependencies);
 
-        CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val,
+        CALL_RT_OR_CT(ev_spsv = oneapi::math::sparse::spsv, main_queue, transpose_val,
                       alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr,
                       { ev_opt });
 
@@ -159,18 +160,18 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
             set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm);
 
             std::size_t workspace_size_2 = 0;
-            CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val,
+            CALL_RT_OR_CT(oneapi::math::sparse::spsv_buffer_size, main_queue, transpose_val,
                           alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr,
                           workspace_size_2);
             if (workspace_size_2 > workspace_size) {
                 workspace_usm = malloc_device_uptr<std::uint8_t>(main_queue, workspace_size_2);
             }
 
-            CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val,
+            CALL_RT_OR_CT(ev_opt = oneapi::math::sparse::spsv_optimize, main_queue, transpose_val,
                           alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr,
                           workspace_usm.get(), dependencies);
 
-            CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val,
+            CALL_RT_OR_CT(ev_spsv = oneapi::math::sparse::spsv, main_queue, transpose_val,
                           alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr,
                           { ev_opt });
         }
@@ -183,11 +184,11 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
         print_error_code(e);
         return 0;
     }
-    catch (const oneapi::mkl::unimplemented& e) {
+    catch (const oneapi::math::unimplemented& e) {
         wait_and_free_handles(main_queue, A_handle, x_handle, y_handle);
         if (descr) {
             sycl::event ev_release_descr;
-            CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue,
+            CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spsv_descr, main_queue,
                           descr);
             ev_release_descr.wait();
         }
@@ -198,7 +199,7 @@ int test_spsv(sycl::device* dev, sycl::property_list queue_properties,
         return 0;
     }
     sycl::event ev_release_descr;
-    CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue, descr,
+    CALL_RT_OR_CT(ev_release_descr = oneapi::math::sparse::release_spsv_descr, main_queue, descr,
                   { ev_spsv });
     ev_release_descr.wait_and_throw();
     free_handles(main_queue, { ev_spsv }, A_handle, x_handle, y_handle);
@@ -221,9 +222,9 @@ TEST_P(SparseSpsvUsmTests, RealSinglePrecision) {
     using fpType = float;
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
@@ -236,9 +237,9 @@ TEST_P(SparseSpsvUsmTests, RealDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
@@ -250,11 +251,11 @@ TEST_P(SparseSpsvUsmTests, ComplexSinglePrecision) {
     using fpType = std::complex<float>;
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::conjtrans, num_passed, num_skipped);
+                        oneapi::math::transpose::conjtrans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
@@ -267,11 +268,11 @@ TEST_P(SparseSpsvUsmTests, ComplexDoublePrecision) {
     CHECK_DOUBLE_ON_DEVICE(GetParam());
     int num_passed = 0, num_skipped = 0;
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::nontrans, num_passed, num_skipped);
+                        oneapi::math::transpose::nontrans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::trans, num_passed, num_skipped);
+                        oneapi::math::transpose::trans, num_passed, num_skipped);
     test_helper<fpType>(test_spsv<fpType, int32_t>, test_spsv<fpType, std::int64_t>, GetParam(),
-                        oneapi::mkl::transpose::conjtrans, num_passed, num_skipped);
+                        oneapi::math::transpose::conjtrans, num_passed, num_skipped);
     if (num_skipped > 0) {
         // Mark that some tests were skipped
         GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped
diff --git a/third-party-programs/THIRD-PARTY-PROGRAMS b/third-party-programs/THIRD-PARTY-PROGRAMS
index fd462fa83..a75bbd2fe 100644
--- a/third-party-programs/THIRD-PARTY-PROGRAMS
+++ b/third-party-programs/THIRD-PARTY-PROGRAMS
@@ -1,4 +1,4 @@
-Intel® oneAPI Math Kernel Library (oneMKL) interfaces
+oneAPI Math Library (oneMath)
 
 This file contains the list of third party software (“third party programs”)
 contained in the Intel software and their required notices and/or license terms.